summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.c2
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c15
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.h9
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.c74
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c402
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.h15
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c42
-rw-r--r--drivers/gpu/drm/i915/gt/hsw_clear_kernel.c61
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c58
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_param.c63
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_param.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_sseu.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c230
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.h34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h90
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c113
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c909
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h78
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c (renamed from drivers/gpu/drm/i915/gt/intel_engine_pool.c)114
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h (renamed from drivers/gpu/drm/i915/gt/intel_engine_pool_types.h)15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c102
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c1355
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c42
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c198
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c474
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h61
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c47
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c191
-rw-r--r--drivers/gpu/drm/i915/gt/ivb_clear_kernel.c61
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c30
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c1471
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c53
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c298
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c1331
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.h17
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c173
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.h23
-rw-r--r--drivers/gpu/drm/i915/gt/st_shmem_utils.c63
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c539
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.h13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c72
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h22
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c42
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c97
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c124
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c60
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c36
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c88
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h63
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c67
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h21
94 files changed, 8810 insertions, 1552 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
index 6a5e9ab20b94..5e3725e62241 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -32,5 +32,5 @@ void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
{ "engines", &engines_fops },
};
- debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
index 75255aaacaed..1de5fbaa1cf9 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -9,6 +9,7 @@
#include "debugfs_engines.h"
#include "debugfs_gt.h"
#include "debugfs_gt_pm.h"
+#include "uc/intel_uc_debugfs.h"
#include "i915_drv.h"
void debugfs_gt_register(struct intel_gt *gt)
@@ -24,17 +25,19 @@ void debugfs_gt_register(struct intel_gt *gt)
debugfs_engines_register(gt, root);
debugfs_gt_pm_register(gt, root);
+
+ intel_uc_debugfs_register(&gt->uc, root);
}
-void debugfs_gt_register_files(struct intel_gt *gt,
- struct dentry *root,
- const struct debugfs_gt_file *files,
- unsigned long count)
+void intel_gt_debugfs_register_files(struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count, void *data)
{
while (count--) {
- if (!files->eval || files->eval(gt))
+ umode_t mode = files->fops->write ? 0644 : 0444;
+ if (!files->eval || files->eval(data))
debugfs_create_file(files->name,
- 0444, root, gt,
+ mode, root, data,
files->fops);
files++;
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
index 4ea0f06cda8f..f77540f727e9 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.h
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -28,12 +28,11 @@ void debugfs_gt_register(struct intel_gt *gt);
struct debugfs_gt_file {
const char *name;
const struct file_operations *fops;
- bool (*eval)(const struct intel_gt *gt);
+ bool (*eval)(void *data);
};
-void debugfs_gt_register_files(struct intel_gt *gt,
- struct dentry *root,
- const struct debugfs_gt_file *files,
- unsigned long count);
+void intel_gt_debugfs_register_files(struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count, void *data);
#endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
index 059c9e5c002e..174a24553322 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -10,6 +10,7 @@
#include "debugfs_gt_pm.h"
#include "i915_drv.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_llc.h"
#include "intel_rc6.h"
#include "intel_rps.h"
@@ -268,7 +269,7 @@ static int frequency_show(struct seq_file *m, void *unused)
yesno(rpmodectl & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
- GEN6_RP_MEDIA_SW_MODE));
+ GEN6_RP_MEDIA_SW_MODE));
vlv_punit_get(i915);
freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
@@ -300,8 +301,9 @@ static int frequency_show(struct seq_file *m, void *unused)
u32 rp_state_cap;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
- u32 rpupei, rpcurup, rpprevup;
- u32 rpdownei, rpcurdown, rpprevdown;
+ u32 rpcurupei, rpcurup, rpprevup;
+ u32 rpcurdownei, rpcurdown, rpprevdown;
+ u32 rpupei, rpupt, rpdownei, rpdownt;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
int max_freq;
@@ -334,12 +336,19 @@ static int frequency_show(struct seq_file *m, void *unused)
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
- rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
- rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+
+ rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+ rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+ rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
cagf = intel_rps_read_actual_frequency(rps);
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
@@ -372,7 +381,7 @@ static int frequency_show(struct seq_file *m, void *unused)
yesno(rpmodectl & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
- GEN6_RP_MEDIA_SW_MODE));
+ GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
@@ -394,23 +403,35 @@ static int frequency_show(struct seq_file *m, void *unused)
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf);
- seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
- rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
- seq_printf(m, "RP CUR UP: %d (%dus)\n",
- rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
- seq_printf(m, "RP PREV UP: %d (%dus)\n",
- rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+ seq_printf(m, "RP CUR UP EI: %d (%dns)\n",
+ rpcurupei,
+ intel_gt_pm_interval_to_ns(gt, rpcurupei));
+ seq_printf(m, "RP CUR UP: %d (%dns)\n",
+ rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
+ seq_printf(m, "RP PREV UP: %d (%dns)\n",
+ rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
seq_printf(m, "Up threshold: %d%%\n",
rps->power.up_threshold);
-
- seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
- rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
- seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
- rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
- seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
- rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+ seq_printf(m, "RP UP EI: %d (%dns)\n",
+ rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
+ seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n",
+ rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
+
+ seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n",
+ rpcurdownei,
+ intel_gt_pm_interval_to_ns(gt, rpcurdownei));
+ seq_printf(m, "RP CUR DOWN: %d (%dns)\n",
+ rpcurdown,
+ intel_gt_pm_interval_to_ns(gt, rpcurdown));
+ seq_printf(m, "RP PREV DOWN: %d (%dns)\n",
+ rpprevdown,
+ intel_gt_pm_interval_to_ns(gt, rpprevdown));
seq_printf(m, "Down threshold: %d%%\n",
rps->power.down_threshold);
+ seq_printf(m, "RP DOWN EI: %d (%dns)\n",
+ rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
+ seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n",
+ rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
@@ -506,8 +527,10 @@ static int llc_show(struct seq_file *m, void *data)
return 0;
}
-static bool llc_eval(const struct intel_gt *gt)
+static bool llc_eval(void *data)
{
+ struct intel_gt *gt = data;
+
return HAS_LLC(gt->i915);
}
@@ -533,7 +556,8 @@ static int rps_boost_show(struct seq_file *m, void *data)
struct drm_i915_private *i915 = gt->i915;
struct intel_rps *rps = &gt->rps;
- seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+ seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
+ seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters));
@@ -553,7 +577,7 @@ static int rps_boost_show(struct seq_file *m, void *data)
seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
- if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+ if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) {
struct intel_uncore *uncore = gt->uncore;
u32 rpup, rpupei;
u32 rpdown, rpdownei;
@@ -580,8 +604,10 @@ static int rps_boost_show(struct seq_file *m, void *data)
return 0;
}
-static bool rps_eval(const struct intel_gt *gt)
+static bool rps_eval(void *data)
{
+ struct intel_gt *gt = data;
+
return HAS_RPS(gt->i915);
}
@@ -597,5 +623,5 @@ void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
{ "rps_boost", &rps_boost_fops, rps_eval },
};
- debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
}
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
new file mode 100644
index 000000000000..de595b66a746
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gen7_renderclear.h"
+#include "i915_drv.h"
+#include "intel_gpu_commands.h"
+
+#define MAX_URB_ENTRIES 64
+#define STATE_SIZE (4 * 1024)
+#define GT3_INLINE_DATA_DELAYS 0x1E00
+#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
+
+struct cb_kernel {
+ const void *data;
+ u32 size;
+};
+
+#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) }
+
+#include "ivb_clear_kernel.c"
+static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel);
+
+#include "hsw_clear_kernel.c"
+static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel);
+
+struct batch_chunk {
+ struct i915_vma *vma;
+ u32 offset;
+ u32 *start;
+ u32 *end;
+ u32 max_items;
+};
+
+struct batch_vals {
+ u32 max_primitives;
+ u32 max_urb_entries;
+ u32 cmd_size;
+ u32 state_size;
+ u32 state_start;
+ u32 batch_size;
+ u32 surface_height;
+ u32 surface_width;
+ u32 scratch_size;
+ u32 max_size;
+};
+
+static void
+batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
+{
+ if (IS_HASWELL(i915)) {
+ bv->max_primitives = 280;
+ bv->max_urb_entries = MAX_URB_ENTRIES;
+ bv->surface_height = 16 * 16;
+ bv->surface_width = 32 * 2 * 16;
+ } else {
+ bv->max_primitives = 128;
+ bv->max_urb_entries = MAX_URB_ENTRIES / 2;
+ bv->surface_height = 16 * 8;
+ bv->surface_width = 32 * 16;
+ }
+ bv->cmd_size = bv->max_primitives * 4096;
+ bv->state_size = STATE_SIZE;
+ bv->state_start = bv->cmd_size;
+ bv->batch_size = bv->cmd_size + bv->state_size;
+ bv->scratch_size = bv->surface_height * bv->surface_width;
+ bv->max_size = bv->batch_size + bv->scratch_size;
+}
+
+static void batch_init(struct batch_chunk *bc,
+ struct i915_vma *vma,
+ u32 *start, u32 offset, u32 max_bytes)
+{
+ bc->vma = vma;
+ bc->offset = offset;
+ bc->start = start + bc->offset / sizeof(*bc->start);
+ bc->end = bc->start;
+ bc->max_items = max_bytes / sizeof(*bc->start);
+}
+
+static u32 batch_offset(const struct batch_chunk *bc, u32 *cs)
+{
+ return (cs - bc->start) * sizeof(*bc->start) + bc->offset;
+}
+
+static u32 batch_addr(const struct batch_chunk *bc)
+{
+ return bc->vma->node.start;
+}
+
+static void batch_add(struct batch_chunk *bc, const u32 d)
+{
+ GEM_BUG_ON((bc->end - bc->start) >= bc->max_items);
+ *bc->end++ = d;
+}
+
+static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items)
+{
+ u32 *map;
+
+ if (align) {
+ u32 *end = PTR_ALIGN(bc->end, align);
+
+ memset32(bc->end, 0, end - bc->end);
+ bc->end = end;
+ }
+
+ map = bc->end;
+ bc->end += items;
+
+ return map;
+}
+
+static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes)
+{
+ GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start)));
+ return batch_alloc_items(bc, align, bytes / sizeof(*bc->start));
+}
+
+static u32
+gen7_fill_surface_state(struct batch_chunk *state,
+ const u32 dst_offset,
+ const struct batch_vals *bv)
+{
+ u32 surface_h = bv->surface_height;
+ u32 surface_w = bv->surface_width;
+ u32 *cs = batch_alloc_items(state, 32, 8);
+ u32 offset = batch_offset(state, cs);
+
+#define SURFACE_2D 1
+#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define RENDER_CACHE_READ_WRITE 1
+
+ *cs++ = SURFACE_2D << 29 |
+ (SURFACEFORMAT_B8G8R8A8_UNORM << 18) |
+ (RENDER_CACHE_READ_WRITE << 8);
+
+ *cs++ = batch_addr(state) + dst_offset;
+
+ *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1);
+ *cs++ = surface_w;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+#define SHADER_CHANNELS(r, g, b, a) \
+ (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16))
+ *cs++ = SHADER_CHANNELS(4, 5, 6, 7);
+ batch_advance(state, cs);
+
+ return offset;
+}
+
+static u32
+gen7_fill_binding_table(struct batch_chunk *state,
+ const struct batch_vals *bv)
+{
+ u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv);
+ u32 *cs = batch_alloc_items(state, 32, 8);
+ u32 offset = batch_offset(state, cs);
+
+ *cs++ = surface_start - state->offset;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(state, cs);
+
+ return offset;
+}
+
+static u32
+gen7_fill_kernel_data(struct batch_chunk *state,
+ const u32 *data,
+ const u32 size)
+{
+ return batch_offset(state,
+ memcpy(batch_alloc_bytes(state, 64, size),
+ data, size));
+}
+
+static u32
+gen7_fill_interface_descriptor(struct batch_chunk *state,
+ const struct batch_vals *bv,
+ const struct cb_kernel *kernel,
+ unsigned int count)
+{
+ u32 kernel_offset =
+ gen7_fill_kernel_data(state, kernel->data, kernel->size);
+ u32 binding_table = gen7_fill_binding_table(state, bv);
+ u32 *cs = batch_alloc_items(state, 32, 8 * count);
+ u32 offset = batch_offset(state, cs);
+
+ *cs++ = kernel_offset;
+ *cs++ = (1 << 7) | (1 << 13);
+ *cs++ = 0;
+ *cs++ = (binding_table - state->offset) | 1;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* 1 - 63dummy idds */
+ memset32(cs, 0x00, (count - 1) * 8);
+ batch_advance(state, cs + (count - 1) * 8);
+
+ return offset;
+}
+
+static void
+gen7_emit_state_base_address(struct batch_chunk *batch,
+ u32 surface_state_base)
+{
+ u32 *cs = batch_alloc_items(batch, 0, 12);
+
+ *cs++ = STATE_BASE_ADDRESS | (12 - 2);
+ /* general */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+ /* surface */
+ *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY;
+ /* dynamic */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+ /* indirect */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+ /* instruction */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+
+ /* general/dynamic/indirect/instruction access Bound */
+ *cs++ = 0;
+ *cs++ = BASE_ADDRESS_MODIFY;
+ *cs++ = 0;
+ *cs++ = BASE_ADDRESS_MODIFY;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_vfe_state(struct batch_chunk *batch,
+ const struct batch_vals *bv,
+ u32 urb_size, u32 curbe_size,
+ u32 mode)
+{
+ u32 urb_entries = bv->max_urb_entries;
+ u32 threads = bv->max_primitives - 1;
+ u32 *cs = batch_alloc_items(batch, 32, 8);
+
+ *cs++ = MEDIA_VFE_STATE | (8 - 2);
+
+ /* scratch buffer */
+ *cs++ = 0;
+
+ /* number of threads & urb entries for GPGPU vs Media Mode */
+ *cs++ = threads << 16 | urb_entries << 8 | mode << 2;
+
+ *cs++ = 0;
+
+ /* urb entry size & curbe size in 256 bits unit */
+ *cs++ = urb_size << 16 | curbe_size;
+
+ /* scoreboard */
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_interface_descriptor_load(struct batch_chunk *batch,
+ const u32 interface_descriptor,
+ unsigned int count)
+{
+ u32 *cs = batch_alloc_items(batch, 8, 4);
+
+ *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2);
+ *cs++ = 0;
+ *cs++ = count * 8 * sizeof(*cs);
+
+ /*
+ * interface descriptor address - it is relative to the dynamics base
+ * address
+ */
+ *cs++ = interface_descriptor;
+ batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_media_object(struct batch_chunk *batch,
+ unsigned int media_object_index)
+{
+ unsigned int x_offset = (media_object_index % 16) * 64;
+ unsigned int y_offset = (media_object_index / 16) * 16;
+ unsigned int inline_data_size;
+ unsigned int media_batch_size;
+ unsigned int i;
+ u32 *cs;
+
+ inline_data_size = 112 * 8;
+ media_batch_size = inline_data_size + 6;
+
+ cs = batch_alloc_items(batch, 8, media_batch_size);
+
+ *cs++ = MEDIA_OBJECT | (media_batch_size - 2);
+
+ /* interface descriptor offset */
+ *cs++ = 0;
+
+ /* without indirect data */
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* scoreboard */
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* inline */
+ *cs++ = (y_offset << 16) | (x_offset);
+ *cs++ = 0;
+ *cs++ = GT3_INLINE_DATA_DELAYS;
+ for (i = 3; i < inline_data_size; i++)
+ *cs++ = 0;
+
+ batch_advance(batch, cs);
+}
+
+static void gen7_emit_pipeline_flush(struct batch_chunk *batch)
+{
+ u32 *cs = batch_alloc_items(batch, 0, 5);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(batch, cs);
+}
+
+static void emit_batch(struct i915_vma * const vma,
+ u32 *start,
+ const struct batch_vals *bv)
+{
+ struct drm_i915_private *i915 = vma->vm->i915;
+ unsigned int desc_count = 64;
+ const u32 urb_size = 112;
+ struct batch_chunk cmds, state;
+ u32 interface_descriptor;
+ unsigned int i;
+
+ batch_init(&cmds, vma, start, 0, bv->cmd_size);
+ batch_init(&state, vma, start, bv->state_start, bv->state_size);
+
+ interface_descriptor =
+ gen7_fill_interface_descriptor(&state, bv,
+ IS_HASWELL(i915) ?
+ &cb_kernel_hsw :
+ &cb_kernel_ivb,
+ desc_count);
+ gen7_emit_pipeline_flush(&cmds);
+ batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+ batch_add(&cmds, MI_NOOP);
+ gen7_emit_state_base_address(&cmds, interface_descriptor);
+ gen7_emit_pipeline_flush(&cmds);
+
+ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0);
+
+ gen7_emit_interface_descriptor_load(&cmds,
+ interface_descriptor,
+ desc_count);
+
+ for (i = 0; i < bv->max_primitives; i++)
+ gen7_emit_media_object(&cmds, i);
+
+ batch_add(&cmds, MI_BATCH_BUFFER_END);
+}
+
+int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
+ struct i915_vma * const vma)
+{
+ struct batch_vals bv;
+ u32 *batch;
+
+ batch_get_defaults(engine->i915, &bv);
+ if (!vma)
+ return bv.max_size;
+
+ GEM_BUG_ON(vma->obj->base.size < bv.max_size);
+
+ batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ emit_batch(vma, memset(batch, 0, bv.max_size), &bv);
+
+ i915_gem_object_flush_map(vma->obj);
+ i915_gem_object_unpin_map(vma->obj);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.h b/drivers/gpu/drm/i915/gt/gen7_renderclear.h
new file mode 100644
index 000000000000..bb100748e2c6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __GEN7_RENDERCLEAR_H__
+#define __GEN7_RENDERCLEAR_H__
+
+struct intel_engine_cs;
+struct i915_vma;
+
+int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
+ struct i915_vma * const vma);
+
+#endif /* __GEN7_RENDERCLEAR_H__ */
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4d1de2d97d5c..699125928272 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -8,6 +8,7 @@
#include "gen8_ppgtt.h"
#include "i915_scatterlist.h"
#include "i915_trace.h"
+#include "i915_pvinfo.h"
#include "i915_vgpu.h"
#include "intel_gt.h"
#include "intel_gtt.h"
@@ -25,6 +26,30 @@ static u64 gen8_pde_encode(const dma_addr_t addr,
return pde;
}
+static u64 gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+ if (unlikely(flags & PTE_READ_ONLY))
+ pte &= ~_PAGE_RW;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ pte |= PPAT_UNCACHED;
+ break;
+ case I915_CACHE_WT:
+ pte |= PPAT_DISPLAY_ELLC;
+ break;
+ default:
+ pte |= PPAT_CACHED;
+ break;
+ }
+
+ return pte;
+}
+
static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
{
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -364,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
return err;
}
+static __always_inline void
+write_pte(gen8_pte_t *pte, const gen8_pte_t val)
+{
+ /* Magic delays? Or can we refine these to flush all in one pass? */
+ *pte = val;
+ wmb(); /* cpu to cache */
+ clflush(pte); /* cache to memory */
+ wmb(); /* visible to all */
+}
+
static __always_inline u64
gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
struct i915_page_directory *pdp,
@@ -380,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
- vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+ write_pte(&vaddr[gen8_pd_index(idx, 0)],
+ pte_encode | iter->dma);
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
@@ -462,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
do {
GEM_BUG_ON(iter->sg->length < page_size);
- vaddr[index++] = encode | iter->dma;
+ write_pte(&vaddr[index++], encode | iter->dma);
start += page_size;
iter->dma += page_size;
@@ -706,6 +742,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
ppgtt->vm.clear_range = gen8_ppgtt_clear;
+ ppgtt->vm.pte_encode = gen8_pte_encode;
+
if (intel_vgpu_active(gt->i915))
gen8_ppgtt_notify_vgt(ppgtt, true);
diff --git a/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c
new file mode 100644
index 000000000000..b47f9d4a0848
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC
+ */
+
+static const u32 hsw_clear_kernel[] = {
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000040, 0x20280c21, 0x00000028, 0x00000001,
+ 0x01000010, 0x20000c20, 0x0000002c, 0x00000000,
+ 0x00010220, 0x34001c00, 0x00001400, 0x00000160,
+ 0x00600001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c,
+ 0x00000005, 0x20601ca5, 0x00000060, 0x00000001,
+ 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d,
+ 0x00000005, 0x20641ca5, 0x00000064, 0x00000003,
+ 0x00000041, 0x207424a5, 0x00000064, 0x00000034,
+ 0x00000040, 0x206014a5, 0x00000060, 0x00000074,
+ 0x00000008, 0x20681c85, 0x00000e00, 0x00000008,
+ 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f,
+ 0x00000041, 0x20701ca5, 0x00000060, 0x00000010,
+ 0x00000040, 0x206814a5, 0x00000068, 0x00000070,
+ 0x00600001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007,
+ 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004,
+ 0x00600001, 0x20800021, 0x008d0000, 0x00000000,
+ 0x00000001, 0x20800021, 0x0000006c, 0x00000000,
+ 0x00000001, 0x20840021, 0x00000068, 0x00000000,
+ 0x00000001, 0x20880061, 0x00000000, 0x00000003,
+ 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001,
+ 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001,
+ 0x02000040, 0x20281c21, 0x00000028, 0xffffffff,
+ 0x00010220, 0x34001c00, 0x00001400, 0xffffffe0,
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000001, 0x220010e4, 0x00000000, 0x00000000,
+ 0x00000001, 0x220831ec, 0x00000000, 0x007f007f,
+ 0x00600001, 0x20400021, 0x008d0000, 0x00000000,
+ 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000,
+ 0x00200001, 0x20400121, 0x00450020, 0x00000000,
+ 0x00000001, 0x20480061, 0x00000000, 0x000f000f,
+ 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef,
+ 0x00800001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20800061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20c00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20e00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21000061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21200061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21400061, 0x00000000, 0x00000000,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x00000040, 0x20402d21, 0x00000020, 0x00100010,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff,
+ 0x00800001, 0xa0000109, 0x00000602, 0x00000000,
+ 0x00000040, 0x22001c84, 0x00000200, 0x00000020,
+ 0x00010220, 0x34001c00, 0x00001400, 0xffffffc0,
+ 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010,
+};
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index cbad7fe722ce..d907d538176e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -64,7 +64,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
if (!--b->irq_enabled)
irq_disable(engine);
- b->irq_armed = false;
+ WRITE_ONCE(b->irq_armed, false);
intel_gt_pm_put_async(engine->gt);
}
@@ -73,7 +73,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
- if (!b->irq_armed)
+ if (!READ_ONCE(b->irq_armed))
return;
spin_lock_irqsave(&b->irq_lock, flags);
@@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
intel_engine_add_retire(engine, tl);
}
+static void __signal_request(struct i915_request *rq, struct list_head *signals)
+{
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ if (!__dma_fence_signal(&rq->fence))
+ return;
+
+ i915_request_get(rq);
+ list_add_tail(&rq->signal_link, signals);
+}
+
static void signal_irq_work(struct irq_work *work)
{
struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
@@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work)
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
+ list_splice_init(&b->signaled_requests, &signal);
+
list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
GEM_BUG_ON(list_empty(&ce->signals));
@@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work)
list_entry(pos, typeof(*rq), signal_link);
GEM_BUG_ON(!check_signal_order(ce, rq));
-
if (!__request_completed(rq))
break;
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
- &rq->fence.flags));
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
- if (!__dma_fence_signal(&rq->fence))
- continue;
-
/*
* Queue for execution after dropping the signaling
* spinlock as the callback chain may end up adding
* more signalers to the same context or engine.
*/
- i915_request_get(rq);
- list_add_tail(&rq->signal_link, &signal);
+ __signal_request(rq, &signal);
}
/*
@@ -233,7 +238,7 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
- b->irq_armed = true;
+ WRITE_ONCE(b->irq_armed, true);
/*
* Since we are waiting on a request, the GPU should be busy
@@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
+ INIT_LIST_HEAD(&b->signaled_requests);
init_irq_work(&b->irq_work, signal_irq_work);
}
@@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&b->irq_lock, flags);
}
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+ struct intel_context *ce)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&b->irq_lock, flags);
+ if (!list_empty(&ce->signals)) {
+ struct i915_request *rq, *next;
+
+ /* Queue for executing the signal callbacks in the irq_work */
+ list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
+ GEM_BUG_ON(rq->engine != engine);
+ GEM_BUG_ON(!__request_completed(rq));
+
+ __signal_request(rq, &b->signaled_requests);
+ }
+
+ INIT_LIST_HEAD(&ce->signals);
+ list_del_init(&ce->signal_link);
+
+ irq_work_queue(&b->irq_work);
+ }
+ spin_unlock_irqrestore(&b->irq_lock, flags);
+}
+
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 8bb444cda14f..74ddb49b2941 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -51,6 +51,11 @@ int intel_context_alloc_state(struct intel_context *ce)
return -EINTR;
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ if (intel_context_is_banned(ce)) {
+ err = -EIO;
+ goto unlock;
+ }
+
err = ce->ops->alloc(ce);
if (unlikely(err))
goto unlock;
@@ -92,6 +97,8 @@ int __intel_context_do_pin(struct intel_context *ce)
{
int err;
+ GEM_BUG_ON(intel_context_is_closed(ce));
+
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce);
if (err)
@@ -107,6 +114,11 @@ int __intel_context_do_pin(struct intel_context *ce)
goto out_release;
}
+ if (unlikely(intel_context_is_closed(ce))) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 18efad255124..07be021882cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -173,6 +173,11 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
}
+static inline bool intel_context_is_closed(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+}
+
static inline bool intel_context_use_semaphores(const struct intel_context *ce)
{
return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
new file mode 100644
index 000000000000..65dcd090245d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_active.h"
+#include "intel_context.h"
+#include "intel_context_param.h"
+#include "intel_ring.h"
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz)
+{
+ int err;
+
+ if (intel_context_lock_pinned(ce))
+ return -EINTR;
+
+ err = i915_active_wait(&ce->active);
+ if (err < 0)
+ goto unlock;
+
+ if (intel_context_is_pinned(ce)) {
+ err = -EBUSY; /* In active use, come back later! */
+ goto unlock;
+ }
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ struct intel_ring *ring;
+
+ /* Replace the existing ringbuffer */
+ ring = intel_engine_create_ring(ce->engine, sz);
+ if (IS_ERR(ring)) {
+ err = PTR_ERR(ring);
+ goto unlock;
+ }
+
+ intel_ring_put(ce->ring);
+ ce->ring = ring;
+
+ /* Context image will be updated on next pin */
+ } else {
+ ce->ring = __intel_context_ring_size(sz);
+ }
+
+unlock:
+ intel_context_unlock_pinned(ce);
+ return err;
+}
+
+long intel_context_get_ring_size(struct intel_context *ce)
+{
+ long sz = (unsigned long)READ_ONCE(ce->ring);
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ if (intel_context_lock_pinned(ce))
+ return -EINTR;
+
+ sz = ce->ring->size;
+ intel_context_unlock_pinned(ce);
+ }
+
+ return sz;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
new file mode 100644
index 000000000000..f053d8633fe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_CONTEXT_PARAM_H
+#define INTEL_CONTEXT_PARAM_H
+
+struct intel_context;
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz);
+long intel_context_get_ring_size(struct intel_context *ce);
+
+#endif /* INTEL_CONTEXT_PARAM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
index 57a30956c922..487299cb91f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -25,8 +25,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
return PTR_ERR(cs);
offset = i915_ggtt_offset(ce->state) +
- LRC_STATE_PN * PAGE_SIZE +
- CTX_R_PWR_CLK_STATE * 4;
+ LRC_STATE_OFFSET + CTX_R_PWR_CLK_STATE * 4;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = lower_32_bits(offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 11278343b9b5..4954b0df4864 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -45,8 +45,8 @@ struct intel_context {
struct intel_engine_cs *engine;
struct intel_engine_cs *inflight;
-#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2)
+#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2)
struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context;
@@ -62,13 +62,20 @@ struct intel_context {
#define CONTEXT_BARRIER_BIT 0
#define CONTEXT_ALLOC_BIT 1
#define CONTEXT_VALID_BIT 2
-#define CONTEXT_USE_SEMAPHORES 3
-#define CONTEXT_BANNED 4
-#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
-#define CONTEXT_NOPREEMPT 6
+#define CONTEXT_CLOSED_BIT 3
+#define CONTEXT_USE_SEMAPHORES 4
+#define CONTEXT_BANNED 5
+#define CONTEXT_FORCE_SINGLE_SUBMISSION 6
+#define CONTEXT_NOPREEMPT 7
u32 *lrc_reg_state;
- u64 lrc_desc;
+ union {
+ struct {
+ u32 lrca;
+ u32 ccid;
+ };
+ u64 desc;
+ } lrc;
u32 tag; /* cookie passed to HW to track this context on submission */
/* Time on GPU as tracked by the hw. */
@@ -95,6 +102,8 @@ struct intel_context {
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
+
+ u8 wa_bb_page; /* if set, page num reserved for context workarounds */
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 29c8c03c5caa..9bf6d4989968 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -107,7 +107,20 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
- return *READ_ONCE(execlists->active);
+ struct i915_request * const *cur, * const *old, *active;
+
+ cur = READ_ONCE(execlists->active);
+ smp_rmb(); /* pairs with overwrite protection in process_csb() */
+ do {
+ old = cur;
+
+ active = READ_ONCE(*cur);
+ cur = READ_ONCE(execlists->active);
+
+ smp_rmb(); /* and complete the seqlock retry */
+ } while (unlikely(cur != old));
+
+ return active;
}
static inline void
@@ -186,6 +199,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
int intel_engines_init_mmio(struct intel_gt *gt);
int intel_engines_init(struct intel_gt *gt);
+void intel_engine_free_request_pool(struct intel_engine_cs *engine);
+
void intel_engines_release(struct intel_gt *gt);
void intel_engines_free(struct intel_gt *gt);
@@ -223,22 +238,35 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+ struct intel_context *ce);
+
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p);
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6);
- batch[1] = flags;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
+ batch[1] = flags1;
batch[2] = offset;
return batch + 6;
}
+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+{
+ return __gen8_emit_pipe_control(batch, 0, flags, offset);
+}
+
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+{
+ return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+}
+
static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
@@ -247,8 +275,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
- *cs++ = GFX_OP_PIPE_CONTROL(6);
- *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+ *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
+ *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
*cs++ = gtt_offset;
*cs++ = 0;
*cs++ = value;
@@ -258,6 +286,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
return cs;
}
+static inline u32*
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+ return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
+}
+
+static inline u32*
+gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
+{
+ return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
+}
+
static inline u32 *
gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
@@ -295,9 +335,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...);
-int intel_enable_engine_stats(struct intel_engine_cs *engine);
-void intel_disable_engine_stats(struct intel_engine_cs *engine);
-
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
struct i915_request *
@@ -320,13 +357,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
-static inline bool
-intel_engine_has_timeslices(const struct intel_engine_cs *engine)
-{
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
- return false;
-
- return intel_engine_has_semaphores(engine);
-}
-
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e46e55354e95..da5b61085257 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -31,7 +31,6 @@
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "intel_engine_user.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
@@ -275,6 +274,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
{
const struct engine_info *info = &intel_engines[id];
+ struct drm_i915_private *i915 = gt->i915;
struct intel_engine_cs *engine;
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
@@ -301,11 +301,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->id = id;
engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
- engine->i915 = gt->i915;
+ engine->i915 = i915;
engine->gt = gt;
engine->uncore = gt->uncore;
engine->hw_id = engine->guc_id = info->hw_id;
- engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
+ engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
engine->class = info->class;
engine->instance = info->instance;
@@ -313,6 +313,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->props.heartbeat_interval_ms =
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+ engine->props.max_busywait_duration_ns =
+ CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
engine->props.preempt_timeout_ms =
CONFIG_DRM_I915_PREEMPT_TIMEOUT;
engine->props.stop_timeout_ms =
@@ -320,11 +322,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
+ /* Override to uninterruptible for OpenCL workloads. */
+ if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
+ engine->props.preempt_timeout_ms = 0;
+
+ engine->defaults = engine->props; /* never to change again */
+
engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
if (engine->context_size)
- DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
+ DRIVER_CAPS(i915)->has_logical_contexts = true;
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
@@ -340,8 +348,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
gt->engine_class[info->class][info->instance] = engine;
gt->engine[id] = engine;
- gt->i915->engine[id] = engine;
-
return 0;
}
@@ -418,17 +424,27 @@ void intel_engines_release(struct intel_gt *gt)
engine->release = NULL;
memset(&engine->reset, 0, sizeof(engine->reset));
-
- gt->i915->engine[id] = NULL;
}
}
+void intel_engine_free_request_pool(struct intel_engine_cs *engine)
+{
+ if (!engine->request_pool)
+ return;
+
+ kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
+}
+
void intel_engines_free(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /* Free the requests! dma-resv keeps fences around for an eternity */
+ rcu_barrier();
+
for_each_engine(engine, gt, id) {
+ intel_engine_free_request_pool(engine);
kfree(engine);
gt->engine[id] = NULL;
}
@@ -616,8 +632,6 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
- intel_engine_pool_init(&engine->pool);
-
/* Use the whole device by default */
engine->sseu =
intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
@@ -639,7 +653,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
struct measure_breadcrumb *frame;
- int dw = -ENOMEM;
+ int dw;
GEM_BUG_ON(!engine->gt->scratch);
@@ -814,12 +828,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
intel_engine_fini_retire(engine);
- intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
if (engine->default_state)
- i915_gem_object_put(engine->default_state);
+ fput(engine->default_state);
if (engine->kernel_context) {
intel_context_unpin(engine->kernel_context);
@@ -1218,6 +1231,49 @@ static void print_request(struct drm_printer *m,
name);
}
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+ struct intel_timeline *tl;
+
+ /*
+ * Even though we are holding the engine->active.lock here, there
+ * is no control over the submission queue per-se and we are
+ * inspecting the active state at a random point in time, with an
+ * unknown queue. Play safe and make sure the timeline remains valid.
+ * (Only being used for pretty printing, one extra kref shouldn't
+ * cause a camel stampede!)
+ */
+ rcu_read_lock();
+ tl = rcu_dereference(rq->timeline);
+ if (!kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+
+ return tl;
+}
+
+static int print_ring(char *buf, int sz, struct i915_request *rq)
+{
+ int len = 0;
+
+ if (!i915_request_signaled(rq)) {
+ struct intel_timeline *tl = get_timeline(rq);
+
+ len = scnprintf(buf, sz,
+ "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
+ i915_ggtt_offset(rq->ring->vma),
+ tl ? tl->hwsp_offset : 0,
+ hwsp_seqno(rq),
+ DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
+ 1000 * 1000));
+
+ if (tl)
+ intel_timeline_put(tl);
+ }
+
+ return len;
+}
+
static void hexdump(struct drm_printer *m, const void *buf, size_t len)
{
const size_t rowsize = 8 * sizeof(u32);
@@ -1247,27 +1303,6 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
}
}
-static struct intel_timeline *get_timeline(struct i915_request *rq)
-{
- struct intel_timeline *tl;
-
- /*
- * Even though we are holding the engine->active.lock here, there
- * is no control over the submission queue per-se and we are
- * inspecting the active state at a random point in time, with an
- * unknown queue. Play safe and make sure the timeline remains valid.
- * (Only being used for pretty printing, one extra kref shouldn't
- * cause a camel stampede!)
- */
- rcu_read_lock();
- tl = rcu_dereference(rq->timeline);
- if (!kref_get_unless_zero(&tl->kref))
- tl = NULL;
- rcu_read_unlock();
-
- return tl;
-}
-
static const char *repr_timer(const struct timer_list *t)
{
if (!READ_ONCE(t->expires))
@@ -1288,6 +1323,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+ if (HAS_EXECLISTS(dev_priv)) {
+ drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+ drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+ }
drm_printf(m, "\tRING_START: 0x%08x\n",
ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD: 0x%08x\n",
@@ -1379,40 +1420,25 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
char hdr[160];
int len;
- len = snprintf(hdr, sizeof(hdr),
- "\t\tActive[%d]: ",
- (int)(port - execlists->active));
- if (!i915_request_signaled(rq)) {
- struct intel_timeline *tl = get_timeline(rq);
-
- len += snprintf(hdr + len, sizeof(hdr) - len,
- "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
- i915_ggtt_offset(rq->ring->vma),
- tl ? tl->hwsp_offset : 0,
- hwsp_seqno(rq),
- DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
- 1000 * 1000));
-
- if (tl)
- intel_timeline_put(tl);
- }
- snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+ len = scnprintf(hdr, sizeof(hdr),
+ "\t\tActive[%d]: ccid:%08x, ",
+ (int)(port - execlists->active),
+ rq->context->lrc.ccid);
+ len += print_ring(hdr + len, sizeof(hdr) - len, rq);
+ scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
- struct intel_timeline *tl = get_timeline(rq);
- char hdr[80];
-
- snprintf(hdr, sizeof(hdr),
- "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
- (int)(port - execlists->pending),
- i915_ggtt_offset(rq->ring->vma),
- tl ? tl->hwsp_offset : 0,
- hwsp_seqno(rq));
- print_request(m, rq, hdr);
+ char hdr[160];
+ int len;
- if (tl)
- intel_timeline_put(tl);
+ len = scnprintf(hdr, sizeof(hdr),
+ "\t\tPending[%d]: ccid:%08x, ",
+ (int)(port - execlists->pending),
+ rq->context->lrc.ccid);
+ len += print_ring(hdr + len, sizeof(hdr) - len, rq);
+ scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+ print_request(m, rq, hdr);
}
rcu_read_unlock();
execlists_active_unlock_bh(execlists);
@@ -1561,58 +1587,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_engine_print_breadcrumbs(engine, m);
}
-/**
- * intel_enable_engine_stats() - Enable engine busy tracking on engine
- * @engine: engine to enable stats collection
- *
- * Start collecting the engine busyness data for @engine.
- *
- * Returns 0 on success or a negative error code.
- */
-int intel_enable_engine_stats(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- unsigned long flags;
- int err = 0;
-
- if (!intel_engine_supports_stats(engine))
- return -ENODEV;
-
- execlists_active_lock_bh(execlists);
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (unlikely(engine->stats.enabled == ~0)) {
- err = -EBUSY;
- goto unlock;
- }
-
- if (engine->stats.enabled++ == 0) {
- struct i915_request * const *port;
- struct i915_request *rq;
-
- engine->stats.enabled_at = ktime_get();
-
- /* XXX submission method oblivious? */
- for (port = execlists->active; (rq = *port); port++)
- engine->stats.active++;
-
- for (port = execlists->pending; (rq = *port); port++) {
- /* Exclude any contexts already counted in active */
- if (!intel_context_inflight_count(rq->context))
- engine->stats.active++;
- }
-
- if (engine->stats.active)
- engine->stats.start = engine->stats.enabled_at;
- }
-
-unlock:
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
- execlists_active_unlock_bh(execlists);
-
- return err;
-}
-
static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
{
ktime_t total = engine->stats.total;
@@ -1621,7 +1595,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
* If the engine is executing something at the moment
* add it to the total.
*/
- if (engine->stats.active)
+ if (atomic_read(&engine->stats.active))
total = ktime_add(total,
ktime_sub(ktime_get(), engine->stats.start));
@@ -1647,28 +1621,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
return total;
}
-/**
- * intel_disable_engine_stats() - Disable engine busy tracking on engine
- * @engine: engine to disable stats collection
- *
- * Stops collecting the engine busyness data for @engine.
- */
-void intel_disable_engine_stats(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (!intel_engine_supports_stats(engine))
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
- WARN_ON_ONCE(engine->stats.enabled == 0);
- if (--engine->stats.enabled == 0) {
- engine->stats.total = __intel_engine_get_busy_time(engine);
- engine->stats.active = 0;
- }
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
static bool match_ring(struct i915_request *rq)
{
u32 ring = ENGINE_READ(rq->engine, RING_START);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index dd825718e4e5..5136c8bf112d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -31,7 +31,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
delay = msecs_to_jiffies_timeout(delay);
if (delay >= HZ)
delay = round_jiffies_up_relative(delay);
- schedule_delayed_work(&engine->heartbeat.work, delay);
+ mod_delayed_work(system_wq, &engine->heartbeat.work, delay);
return true;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index b6cf284e3a2d..d0a1078ef632 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -10,31 +10,22 @@
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
#include "intel_ring.h"
+#include "shmem_utils.h"
static int __engine_unpark(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
container_of(wf, typeof(*engine), wakeref);
struct intel_context *ce;
- void *map;
ENGINE_TRACE(engine, "\n");
intel_gt_pm_get(engine->gt);
- /* Pin the default state for fast resets from atomic context. */
- map = NULL;
- if (engine->default_state)
- map = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (!IS_ERR_OR_NULL(map))
- engine->pinned_default_state = map;
-
/* Discard stale context state from across idling */
ce = engine->kernel_context;
if (ce) {
@@ -44,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
struct drm_i915_gem_object *obj = ce->state->obj;
int type = i915_coherent_map_type(engine->i915);
+ void *map;
map = i915_gem_object_pin_map(obj, type);
if (!IS_ERR(map)) {
@@ -181,7 +173,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* Ergo, if we put ourselves on the timelines.active_list
* (se intel_timeline_enter()) before we increment the
* engine->wakeref.count, we may see the request completion and retire
- * it causing an undeflow of the engine->wakeref.
+ * it causing an underflow of the engine->wakeref.
*/
flags = __timeline_mark_lock(ce);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
@@ -255,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
- intel_engine_pool_park(&engine->pool);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
@@ -263,11 +254,6 @@ static int __engine_park(struct intel_wakeref *wf)
if (engine->park)
engine->park(engine);
- if (engine->pinned_default_state) {
- i915_gem_object_unpin_map(engine->default_state);
- engine->pinned_default_state = NULL;
- }
-
engine->execlists.no_priolist = false;
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index e52c2b0cb245..418df0a13145 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -37,6 +37,12 @@ static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
intel_wakeref_put_async(&engine->wakeref);
}
+static inline void intel_engine_pm_put_delay(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ intel_wakeref_put_delay(&engine->wakeref, delay);
+}
+
static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
{
intel_wakeref_unlock_wait(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
deleted file mode 100644
index 1bd89cadc3b7..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef INTEL_ENGINE_POOL_H
-#define INTEL_ENGINE_POOL_H
-
-#include "intel_engine_pool_types.h"
-#include "i915_active.h"
-#include "i915_request.h"
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
-
-static inline int
-intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
- struct i915_request *rq)
-{
- return i915_active_add_request(&node->active, rq);
-}
-
-static inline void
-intel_engine_pool_put(struct intel_engine_pool_node *node)
-{
- i915_active_release(&node->active);
-}
-
-void intel_engine_pool_init(struct intel_engine_pool *pool);
-void intel_engine_pool_park(struct intel_engine_pool *pool);
-void intel_engine_pool_fini(struct intel_engine_pool *pool);
-
-#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index b23366a81048..2b6cdf47d428 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,7 +22,6 @@
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
-#include "intel_engine_pool_types.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
#include "intel_wakeref.h"
@@ -157,6 +156,20 @@ struct intel_engine_execlists {
struct i915_priolist default_priolist;
/**
+ * @ccid: identifier for contexts submitted to this engine
+ */
+ u32 ccid;
+
+ /**
+ * @yield: CCID at the time of the last semaphore-wait interrupt.
+ *
+ * Instead of leaving a semaphore busy-spinning on an engine, we would
+ * like to switch to another ready context, i.e. yielding the semaphore
+ * timeslice.
+ */
+ u32 yield;
+
+ /**
* @error_interrupt: CS Master EIR
*
* The CS generates an interrupt when it detects an error. We capture
@@ -167,6 +180,11 @@ struct intel_engine_execlists {
u32 error_interrupt;
/**
+ * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
+ */
+ u32 reset_ccid;
+
+ /**
* @no_priolist: priority lists disabled
*/
bool no_priolist;
@@ -295,8 +313,7 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
- unsigned int context_tag;
-#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
+ unsigned long context_tag;
struct rb_node uabi_node;
@@ -308,6 +325,9 @@ struct intel_engine_cs {
struct list_head hold; /* ready requests, but on hold */
} active;
+ /* keep a request in reserve for a [pm] barrier under oom */
+ struct i915_request *request_pool;
+
struct llist_head barrier_tasks;
struct intel_context *kernel_context; /* pinned */
@@ -323,8 +343,7 @@ struct intel_engine_cs {
unsigned long wakeref_serial;
struct intel_wakeref wakeref;
- struct drm_i915_gem_object *default_state;
- void *pinned_default_state;
+ struct file *default_state;
struct {
struct intel_ring *ring;
@@ -358,6 +377,8 @@ struct intel_engine_cs {
spinlock_t irq_lock;
struct list_head signalers;
+ struct list_head signaled_requests;
+
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
@@ -389,13 +410,6 @@ struct intel_engine_cs {
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
} pmu;
- /*
- * A pool of objects to use as shadow copies of client batch buffers
- * when the command parser is enabled. Prevents the client from
- * modifying the batch contents after software parsing.
- */
- struct intel_engine_pool pool;
-
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
struct i915_wa_list ctx_wa_list;
@@ -407,6 +421,7 @@ struct intel_engine_cs {
void (*irq_enable)(struct intel_engine_cs *engine);
void (*irq_disable)(struct intel_engine_cs *engine);
+ void (*sanitize)(struct intel_engine_cs *engine);
int (*resume)(struct intel_engine_cs *engine);
struct {
@@ -483,10 +498,11 @@ struct intel_engine_cs {
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
-#define I915_ENGINE_IS_VIRTUAL BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_TIMESLICES BIT(4)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
+#define I915_ENGINE_IS_VIRTUAL BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
unsigned int flags;
/*
@@ -515,42 +531,43 @@ struct intel_engine_cs {
struct {
/**
- * @lock: Lock protecting the below fields.
- */
- seqlock_t lock;
- /**
- * @enabled: Reference count indicating number of listeners.
+ * @active: Number of contexts currently scheduled in.
*/
- unsigned int enabled;
+ atomic_t active;
+
/**
- * @active: Number of contexts currently scheduled in.
+ * @lock: Lock protecting the below fields.
*/
- unsigned int active;
+ seqlock_t lock;
+
/**
- * @enabled_at: Timestamp when busy stats were enabled.
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
*/
- ktime_t enabled_at;
+ ktime_t total;
+
/**
* @start: Timestamp of the last idle to active transition.
*
* Idle is defined as active == 0, active is active > 0.
*/
ktime_t start;
+
/**
- * @total: Total time this engine was busy.
- *
- * Accumulated time not counting the most recent block in cases
- * where engine is currently busy (active > 0).
+ * @rps: Utilisation at last RPS sampling.
*/
- ktime_t total;
+ ktime_t rps;
} stats;
struct {
unsigned long heartbeat_interval_ms;
+ unsigned long max_busywait_duration_ns;
unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
- } props;
+ } props, defaults;
};
static inline bool
@@ -584,6 +601,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
}
static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return engine->flags & I915_ENGINE_HAS_TIMESLICES;
+}
+
+static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
{
return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 7dae91e0d002..66165b10256e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -8,6 +8,8 @@
#include <asm/set_memory.h>
#include <asm/smp.h>
+#include <drm/i915_drm.h>
+
#include "intel_gt.h"
#include "i915_drv.h"
#include "i915_scatterlist.h"
@@ -63,7 +65,7 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt)
ggtt->mappable_end);
}
- i915_ggtt_init_fences(ggtt);
+ intel_ggtt_init_fences(ggtt);
return 0;
}
@@ -157,6 +159,13 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_gtt_chipset_flush();
}
+static u64 gen8_ggtt_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ return addr | _PAGE_PRESENT;
+}
+
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
{
writeq(pte, addr);
@@ -172,7 +181,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
gen8_pte_t __iomem *pte =
(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
- gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+ gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
ggtt->invalidate(ggtt);
}
@@ -182,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
enum i915_cache_level level,
u32 flags)
{
+ const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- struct sgt_iter sgt_iter;
- gen8_pte_t __iomem *gtt_entries;
- const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+ gen8_pte_t __iomem *gte;
+ gen8_pte_t __iomem *end;
+ struct sgt_iter iter;
dma_addr_t addr;
/*
@@ -193,10 +203,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
* not to allow the user to override access to a read only page.
*/
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
- gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
- for_each_sgt_daddr(addr, sgt_iter, vma->pages)
- gen8_set_pte(gtt_entries++, pte_encode | addr);
+ gte = (gen8_pte_t __iomem *)ggtt->gsm;
+ gte += vma->node.start / I915_GTT_PAGE_SIZE;
+ end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+
+ for_each_sgt_daddr(addr, iter, vma->pages)
+ gen8_set_pte(gte++, pte_encode | addr);
+ GEM_BUG_ON(gte > end);
+
+ /* Fill the allocated but "unused" space beyond the end of the buffer */
+ while (gte < end)
+ gen8_set_pte(gte++, vm->scratch[0].encode);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -232,13 +249,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
- unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
+ gen6_pte_t __iomem *gte;
+ gen6_pte_t __iomem *end;
struct sgt_iter iter;
dma_addr_t addr;
+ gte = (gen6_pte_t __iomem *)ggtt->gsm;
+ gte += vma->node.start / I915_GTT_PAGE_SIZE;
+ end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+
for_each_sgt_daddr(addr, iter, vma->pages)
- iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+ iowrite32(vm->pte_encode(addr, level, flags), gte++);
+ GEM_BUG_ON(gte > end);
+
+ /* Fill the allocated but "unused" space beyond the end of the buffer */
+ while (gte < end)
+ iowrite32(vm->scratch[0].encode, gte++);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -427,7 +453,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
u64 size;
int ret;
- if (!USES_GUC(ggtt->vm.i915))
+ if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
return 0;
GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
@@ -689,11 +715,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
*/
void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
+ struct i915_ggtt *ggtt = &i915->ggtt;
struct pagevec *pvec;
- fini_aliasing_ppgtt(&i915->ggtt);
+ fini_aliasing_ppgtt(ggtt);
- ggtt_cleanup_hw(&i915->ggtt);
+ intel_ggtt_fini_fences(ggtt);
+ ggtt_cleanup_hw(ggtt);
pvec = &i915->mm.wc_stash.pvec;
if (pvec->nr) {
@@ -754,17 +782,17 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
* readback check when writing GTT PTE entries.
*/
if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
- ggtt->gsm = ioremap_nocache(phys_addr, size);
+ ggtt->gsm = ioremap(phys_addr, size);
else
ggtt->gsm = ioremap_wc(phys_addr, size);
if (!ggtt->gsm) {
- DRM_ERROR("Failed to map the ggtt page table\n");
+ drm_err(&i915->drm, "Failed to map the ggtt page table\n");
return -ENOMEM;
}
ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
if (ret) {
- DRM_ERROR("Scratch setup failed\n");
+ drm_err(&i915->drm, "Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
iounmap(ggtt->gsm);
return ret;
@@ -812,7 +840,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
struct pci_dev *pdev = i915->drm.pdev;
unsigned int size;
u16 snb_gmch_ctl;
- int err;
/* TODO: We're not aware of mappable constraints on gen8 yet */
if (!IS_DGFX(i915)) {
@@ -820,12 +847,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->mappable_end = resource_size(&ggtt->gmadr);
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
- if (err)
- DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-
pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
if (IS_CHERRYVIEW(i915))
size = chv_get_total_gtt_size(snb_gmch_ctl);
@@ -857,7 +878,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
ggtt->vm.vma_ops.clear_pages = clear_pages;
- ggtt->vm.pte_encode = gen8_pte_encode;
+ ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
setup_private_pat(ggtt->vm.gt->uncore);
@@ -961,7 +982,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
struct pci_dev *pdev = i915->drm.pdev;
unsigned int size;
u16 snb_gmch_ctl;
- int err;
ggtt->gmadr = pci_resource(pdev, 2);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
@@ -971,15 +991,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
* just a coarse sanity check.
*/
if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
- DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+ drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
+ &ggtt->mappable_end);
return -ENXIO;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
- if (err)
- DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
size = gen6_get_total_gtt_size(snb_gmch_ctl);
@@ -1026,7 +1042,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
if (!ret) {
- DRM_ERROR("failed to set up gmch\n");
+ drm_err(&i915->drm, "failed to set up gmch\n");
return -EIO;
}
@@ -1049,7 +1065,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.clear_pages = clear_pages;
if (unlikely(ggtt->do_idle_maps))
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"Applying Ironlake quirks for intel_iommu\n");
return 0;
@@ -1074,26 +1090,29 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
return ret;
if ((ggtt->vm.total - 1) >> 32) {
- DRM_ERROR("We never expected a Global GTT with more than 32bits"
- " of address space! Found %lldM!\n",
- ggtt->vm.total >> 20);
+ drm_err(&i915->drm,
+ "We never expected a Global GTT with more than 32bits"
+ " of address space! Found %lldM!\n",
+ ggtt->vm.total >> 20);
ggtt->vm.total = 1ULL << 32;
ggtt->mappable_end =
min_t(u64, ggtt->mappable_end, ggtt->vm.total);
}
if (ggtt->mappable_end > ggtt->vm.total) {
- DRM_ERROR("mappable aperture extends past end of GGTT,"
- " aperture=%pa, total=%llx\n",
- &ggtt->mappable_end, ggtt->vm.total);
+ drm_err(&i915->drm,
+ "mappable aperture extends past end of GGTT,"
+ " aperture=%pa, total=%llx\n",
+ &ggtt->mappable_end, ggtt->vm.total);
ggtt->mappable_end = ggtt->vm.total;
}
/* GMADR is the PCI mmio aperture into the global GTT. */
- DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
- DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
- DRM_DEBUG_DRIVER("DSM size = %lluM\n",
- (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+ drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
+ drm_dbg(&i915->drm, "GMADR size = %lluM\n",
+ (u64)ggtt->mappable_end >> 20);
+ drm_dbg(&i915->drm, "DSM size = %lluM\n",
+ (u64)resource_size(&intel_graphics_stolen_res) >> 20);
return 0;
}
@@ -1111,7 +1130,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
return ret;
if (intel_vtd_active())
- dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+ drm_info(&i915->drm, "VT-d active for gfx access\n");
return 0;
}
@@ -1186,6 +1205,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
if (INTEL_GEN(ggtt->vm.i915) >= 8)
setup_private_pat(ggtt->vm.gt->uncore);
+
+ intel_ggtt_restore_fences(ggtt);
}
static struct scatterlist *
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
new file mode 100644
index 000000000000..7fb36b12fe7a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -0,0 +1,909 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_pvinfo.h"
+#include "i915_vgpu.h"
+
+/**
+ * DOC: fence register handling
+ *
+ * Important to avoid confusions: "fences" in the i915 driver are not execution
+ * fences used to track command completion but hardware detiler objects which
+ * wrap a given range of the global GTT. Each platform has only a fairly limited
+ * set of these objects.
+ *
+ * Fences are used to detile GTT memory mappings. They're also connected to the
+ * hardware frontbuffer render tracking and hence interact with frontbuffer
+ * compression. Furthermore on older platforms fences are required for tiled
+ * objects used by the display engine. They can also be used by the render
+ * engine - they're required for blitter commands and are optional for render
+ * commands. But on gen4+ both display (with the exception of fbc) and rendering
+ * have their own tiling state bits and don't need fences.
+ *
+ * Also note that fences only support X and Y tiling and hence can't be used for
+ * the fancier new tiling formats like W, Ys and Yf.
+ *
+ * Finally note that because fences are such a restricted resource they're
+ * dynamically associated with objects. Furthermore fence state is committed to
+ * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
+ * explicitly call i915_gem_object_get_fence() to synchronize fencing status
+ * for cpu access. Also note that some code wants an unfenced view, for those
+ * cases the fence can be removed forcefully with i915_gem_object_put_fence().
+ *
+ * Internally these functions will synchronize with userspace access by removing
+ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
+ */
+
+#define pipelined 0
+
+static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
+{
+ return fence->ggtt->vm.i915;
+}
+
+static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
+{
+ return fence->ggtt->vm.gt->uncore;
+}
+
+static void i965_write_fence_reg(struct i915_fence_reg *fence)
+{
+ i915_reg_t fence_reg_lo, fence_reg_hi;
+ int fence_pitch_shift;
+ u64 val;
+
+ if (INTEL_GEN(fence_to_i915(fence)) >= 6) {
+ fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
+ fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
+ fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
+
+ } else {
+ fence_reg_lo = FENCE_REG_965_LO(fence->id);
+ fence_reg_hi = FENCE_REG_965_HI(fence->id);
+ fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
+ }
+
+ val = 0;
+ if (fence->tiling) {
+ unsigned int stride = fence->stride;
+
+ GEM_BUG_ON(!IS_ALIGNED(stride, 128));
+
+ val = fence->start + fence->size - I965_FENCE_PAGE;
+ val <<= 32;
+ val |= fence->start;
+ val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
+ if (fence->tiling == I915_TILING_Y)
+ val |= BIT(I965_FENCE_TILING_Y_SHIFT);
+ val |= I965_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+
+ /*
+ * To w/a incoherency with non-atomic 64-bit register updates,
+ * we split the 64-bit update into two 32-bit writes. In order
+ * for a partial fence not to be evaluated between writes, we
+ * precede the update with write to turn off the fence register,
+ * and only enable the fence as the last step.
+ *
+ * For extra levels of paranoia, we make sure each step lands
+ * before applying the next step.
+ */
+ intel_uncore_write_fw(uncore, fence_reg_lo, 0);
+ intel_uncore_posting_read_fw(uncore, fence_reg_lo);
+
+ intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val));
+ intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val));
+ intel_uncore_posting_read_fw(uncore, fence_reg_lo);
+ }
+}
+
+static void i915_write_fence_reg(struct i915_fence_reg *fence)
+{
+ u32 val;
+
+ val = 0;
+ if (fence->tiling) {
+ unsigned int stride = fence->stride;
+ unsigned int tiling = fence->tiling;
+ bool is_y_tiled = tiling == I915_TILING_Y;
+
+ if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
+ stride /= 128;
+ else
+ stride /= 512;
+ GEM_BUG_ON(!is_power_of_2(stride));
+
+ val = fence->start;
+ if (is_y_tiled)
+ val |= BIT(I830_FENCE_TILING_Y_SHIFT);
+ val |= I915_FENCE_SIZE_BITS(fence->size);
+ val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT;
+
+ val |= I830_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ intel_uncore_write_fw(uncore, reg, val);
+ intel_uncore_posting_read_fw(uncore, reg);
+ }
+}
+
+static void i830_write_fence_reg(struct i915_fence_reg *fence)
+{
+ u32 val;
+
+ val = 0;
+ if (fence->tiling) {
+ unsigned int stride = fence->stride;
+
+ val = fence->start;
+ if (fence->tiling == I915_TILING_Y)
+ val |= BIT(I830_FENCE_TILING_Y_SHIFT);
+ val |= I830_FENCE_SIZE_BITS(fence->size);
+ val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT;
+ val |= I830_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ intel_uncore_write_fw(uncore, reg, val);
+ intel_uncore_posting_read_fw(uncore, reg);
+ }
+}
+
+static void fence_write(struct i915_fence_reg *fence)
+{
+ struct drm_i915_private *i915 = fence_to_i915(fence);
+
+ /*
+ * Previous access through the fence register is marshalled by
+ * the mb() inside the fault handlers (i915_gem_release_mmaps)
+ * and explicitly managed for internal users.
+ */
+
+ if (IS_GEN(i915, 2))
+ i830_write_fence_reg(fence);
+ else if (IS_GEN(i915, 3))
+ i915_write_fence_reg(fence);
+ else
+ i965_write_fence_reg(fence);
+
+ /*
+ * Access through the fenced region afterwards is
+ * ordered by the posting reads whilst writing the registers.
+ */
+}
+
+static bool gpu_uses_fence_registers(struct i915_fence_reg *fence)
+{
+ return INTEL_GEN(fence_to_i915(fence)) < 4;
+}
+
+static int fence_update(struct i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ struct i915_ggtt *ggtt = fence->ggtt;
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+ intel_wakeref_t wakeref;
+ struct i915_vma *old;
+ int ret;
+
+ fence->tiling = 0;
+ if (vma) {
+ GEM_BUG_ON(!i915_gem_object_get_stride(vma->obj) ||
+ !i915_gem_object_get_tiling(vma->obj));
+
+ if (!i915_vma_is_map_and_fenceable(vma))
+ return -EINVAL;
+
+ if (gpu_uses_fence_registers(fence)) {
+ /* implicit 'unfenced' GPU blits */
+ ret = i915_vma_sync(vma);
+ if (ret)
+ return ret;
+ }
+
+ fence->start = vma->node.start;
+ fence->size = vma->fence_size;
+ fence->stride = i915_gem_object_get_stride(vma->obj);
+ fence->tiling = i915_gem_object_get_tiling(vma->obj);
+ }
+ WRITE_ONCE(fence->dirty, false);
+
+ old = xchg(&fence->vma, NULL);
+ if (old) {
+ /* XXX Ideally we would move the waiting to outside the mutex */
+ ret = i915_active_wait(&fence->active);
+ if (ret) {
+ fence->vma = old;
+ return ret;
+ }
+
+ i915_vma_flush_writes(old);
+
+ /*
+ * Ensure that all userspace CPU access is completed before
+ * stealing the fence.
+ */
+ if (old != vma) {
+ GEM_BUG_ON(old->fence != fence);
+ i915_vma_revoke_mmap(old);
+ old->fence = NULL;
+ }
+
+ list_move(&fence->link, &ggtt->fence_list);
+ }
+
+ /*
+ * We only need to update the register itself if the device is awake.
+ * If the device is currently powered down, we will defer the write
+ * to the runtime resume, see intel_ggtt_restore_fences().
+ *
+ * This only works for removing the fence register, on acquisition
+ * the caller must hold the rpm wakeref. The fence register must
+ * be cleared before we can use any other fences to ensure that
+ * the new fences do not overlap the elided clears, confusing HW.
+ */
+ wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm);
+ if (!wakeref) {
+ GEM_BUG_ON(vma);
+ return 0;
+ }
+
+ WRITE_ONCE(fence->vma, vma);
+ fence_write(fence);
+
+ if (vma) {
+ vma->fence = fence;
+ list_move_tail(&fence->link, &ggtt->fence_list);
+ }
+
+ intel_runtime_pm_put(uncore->rpm, wakeref);
+ return 0;
+}
+
+/**
+ * i915_vma_revoke_fence - force-remove fence for a VMA
+ * @vma: vma to map linearly (not through a fence reg)
+ *
+ * This function force-removes any fence from the given object, which is useful
+ * if the kernel wants to do untiled GTT access.
+ */
+void i915_vma_revoke_fence(struct i915_vma *vma)
+{
+ struct i915_fence_reg *fence = vma->fence;
+ intel_wakeref_t wakeref;
+
+ lockdep_assert_held(&vma->vm->mutex);
+ if (!fence)
+ return;
+
+ GEM_BUG_ON(fence->vma != vma);
+ GEM_BUG_ON(!i915_active_is_idle(&fence->active));
+ GEM_BUG_ON(atomic_read(&fence->pin_count));
+
+ fence->tiling = 0;
+ WRITE_ONCE(fence->vma, NULL);
+ vma->fence = NULL;
+
+ with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref)
+ fence_write(fence);
+}
+
+static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
+{
+ struct i915_fence_reg *fence;
+
+ list_for_each_entry(fence, &ggtt->fence_list, link) {
+ GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
+
+ if (atomic_read(&fence->pin_count))
+ continue;
+
+ return fence;
+ }
+
+ /* Wait for completion of pending flips which consume fences */
+ if (intel_has_pending_fb_unpin(ggtt->vm.i915))
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-EDEADLK);
+}
+
+int __i915_vma_pin_fence(struct i915_vma *vma)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+ struct i915_fence_reg *fence;
+ struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+ int err;
+
+ lockdep_assert_held(&vma->vm->mutex);
+
+ /* Just update our place in the LRU if our fence is getting reused. */
+ if (vma->fence) {
+ fence = vma->fence;
+ GEM_BUG_ON(fence->vma != vma);
+ atomic_inc(&fence->pin_count);
+ if (!fence->dirty) {
+ list_move_tail(&fence->link, &ggtt->fence_list);
+ return 0;
+ }
+ } else if (set) {
+ fence = fence_find(ggtt);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+
+ GEM_BUG_ON(atomic_read(&fence->pin_count));
+ atomic_inc(&fence->pin_count);
+ } else {
+ return 0;
+ }
+
+ err = fence_update(fence, set);
+ if (err)
+ goto out_unpin;
+
+ GEM_BUG_ON(fence->vma != set);
+ GEM_BUG_ON(vma->fence != (set ? fence : NULL));
+
+ if (set)
+ return 0;
+
+out_unpin:
+ atomic_dec(&fence->pin_count);
+ return err;
+}
+
+/**
+ * i915_vma_pin_fence - set up fencing for a vma
+ * @vma: vma to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ *
+ * For an untiled surface, this removes any existing fence.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int i915_vma_pin_fence(struct i915_vma *vma)
+{
+ int err;
+
+ if (!vma->fence && !i915_gem_object_is_tiled(vma->obj))
+ return 0;
+
+ /*
+ * Note that we revoke fences on runtime suspend. Therefore the user
+ * must keep the device awake whilst using the fence.
+ */
+ assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
+ GEM_BUG_ON(!i915_vma_is_pinned(vma));
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+
+ err = mutex_lock_interruptible(&vma->vm->mutex);
+ if (err)
+ return err;
+
+ err = __i915_vma_pin_fence(vma);
+ mutex_unlock(&vma->vm->mutex);
+
+ return err;
+}
+
+/**
+ * i915_reserve_fence - Reserve a fence for vGPU
+ * @ggtt: Global GTT
+ *
+ * This function walks the fence regs looking for a free one and remove
+ * it from the fence_list. It is used to reserve fence for vGPU to use.
+ */
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
+{
+ struct i915_fence_reg *fence;
+ int count;
+ int ret;
+
+ lockdep_assert_held(&ggtt->vm.mutex);
+
+ /* Keep at least one fence available for the display engine. */
+ count = 0;
+ list_for_each_entry(fence, &ggtt->fence_list, link)
+ count += !atomic_read(&fence->pin_count);
+ if (count <= 1)
+ return ERR_PTR(-ENOSPC);
+
+ fence = fence_find(ggtt);
+ if (IS_ERR(fence))
+ return fence;
+
+ if (fence->vma) {
+ /* Force-remove fence from VMA */
+ ret = fence_update(fence, NULL);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ list_del(&fence->link);
+
+ return fence;
+}
+
+/**
+ * i915_unreserve_fence - Reclaim a reserved fence
+ * @fence: the fence reg
+ *
+ * This function add a reserved fence register from vGPU to the fence_list.
+ */
+void i915_unreserve_fence(struct i915_fence_reg *fence)
+{
+ struct i915_ggtt *ggtt = fence->ggtt;
+
+ lockdep_assert_held(&ggtt->vm.mutex);
+
+ list_add(&fence->link, &ggtt->fence_list);
+}
+
+/**
+ * intel_ggtt_restore_fences - restore fence state
+ * @ggtt: Global GTT
+ *
+ * Restore the hw fence state to match the software tracking again, to be called
+ * after a gpu reset and on resume. Note that on runtime suspend we only cancel
+ * the fences, to be reacquired by the user later.
+ */
+void intel_ggtt_restore_fences(struct i915_ggtt *ggtt)
+{
+ int i;
+
+ for (i = 0; i < ggtt->num_fences; i++)
+ fence_write(&ggtt->fence_regs[i]);
+}
+
+/**
+ * DOC: tiling swizzling details
+ *
+ * The idea behind tiling is to increase cache hit rates by rearranging
+ * pixel data so that a group of pixel accesses are in the same cacheline.
+ * Performance improvement from doing this on the back/depth buffer are on
+ * the order of 30%.
+ *
+ * Intel architectures make this somewhat more complicated, though, by
+ * adjustments made to addressing of data when the memory is in interleaved
+ * mode (matched pairs of DIMMS) to improve memory bandwidth.
+ * For interleaved memory, the CPU sends every sequential 64 bytes
+ * to an alternate memory channel so it can get the bandwidth from both.
+ *
+ * The GPU also rearranges its accesses for increased bandwidth to interleaved
+ * memory, and it matches what the CPU does for non-tiled. However, when tiled
+ * it does it a little differently, since one walks addresses not just in the
+ * X direction but also Y. So, along with alternating channels when bit
+ * 6 of the address flips, it also alternates when other bits flip -- Bits 9
+ * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
+ * are common to both the 915 and 965-class hardware.
+ *
+ * The CPU also sometimes XORs in higher bits as well, to improve
+ * bandwidth doing strided access like we do so frequently in graphics. This
+ * is called "Channel XOR Randomization" in the MCH documentation. The result
+ * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
+ * decode.
+ *
+ * All of this bit 6 XORing has an effect on our memory management,
+ * as we need to make sure that the 3d driver can correctly address object
+ * contents.
+ *
+ * If we don't have interleaved memory, all tiling is safe and no swizzling is
+ * required.
+ *
+ * When bit 17 is XORed in, we simply refuse to tile at all. Bit
+ * 17 is not just a page offset, so as we page an object out and back in,
+ * individual pages in it will have different bit 17 addresses, resulting in
+ * each 64 bytes being swapped with its neighbor!
+ *
+ * Otherwise, if interleaved, we have to tell the 3d driver what the address
+ * swizzling it needs to do is, since it's writing with the CPU to the pages
+ * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
+ * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
+ * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
+ * to match what the GPU expects.
+ */
+
+/**
+ * detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @ggtt: Global GGTT
+ *
+ * Detects bit 6 swizzling of address lookup between IGD access and CPU
+ * access through main memory.
+ */
+static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+
+ if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) {
+ /*
+ * On BDW+, swizzling is not used. We leave the CPU memory
+ * controller in charge of optimizing memory accesses without
+ * the extra address manipulation GPU side.
+ *
+ * VLV and CHV don't have GPU swizzling.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ } else if (INTEL_GEN(i915) >= 6) {
+ if (i915->preserve_bios_swizzle) {
+ if (intel_uncore_read(uncore, DISP_ARB_CTL) &
+ DISP_TILE_SURFACE_SWIZZLING) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else {
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+ } else {
+ u32 dimm_c0, dimm_c1;
+ dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0);
+ dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1);
+ dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+ dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+ /*
+ * Enable swizzling when the channels are populated
+ * with identically sized dimms. We don't need to check
+ * the 3rd channel because no cpu with gpu attached
+ * ships in that configuration. Also, swizzling only
+ * makes sense for 2 channels anyway.
+ */
+ if (dimm_c0 == dimm_c1) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else {
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+ }
+ } else if (IS_GEN(i915, 5)) {
+ /*
+ * On Ironlake whatever DRAM config, GPU always do
+ * same swizzling setup.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else if (IS_GEN(i915, 2)) {
+ /*
+ * As far as we know, the 865 doesn't have these bit 6
+ * swizzling issues.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ } else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) {
+ /*
+ * The 965, G33, and newer, have a very flexible memory
+ * configuration. It will enable dual-channel mode
+ * (interleaving) on as much memory as it can, and the GPU
+ * will additionally sometimes enable different bit 6
+ * swizzling for tiled objects from the CPU.
+ *
+ * Here's what I found on the G965:
+ * slot fill memory size swizzling
+ * 0A 0B 1A 1B 1-ch 2-ch
+ * 512 0 0 0 512 0 O
+ * 512 0 512 0 16 1008 X
+ * 512 0 0 512 16 1008 X
+ * 0 512 0 512 16 1008 X
+ * 1024 1024 1024 0 2048 1024 O
+ *
+ * We could probably detect this based on either the DRB
+ * matching, which was the case for the swizzling required in
+ * the table above, or from the 1-ch value being less than
+ * the minimum size of a rank.
+ *
+ * Reports indicate that the swizzling actually
+ * varies depending upon page placement inside the
+ * channels, i.e. we see swizzled pages where the
+ * banks of memory are paired and unswizzled on the
+ * uneven portion, so leave that as unknown.
+ */
+ if (intel_uncore_read(uncore, C0DRB3) ==
+ intel_uncore_read(uncore, C1DRB3)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ }
+ } else {
+ u32 dcc = intel_uncore_read(uncore, DCC);
+
+ /*
+ * On 9xx chipsets, channel interleave by the CPU is
+ * determined by DCC. For single-channel, neither the CPU
+ * nor the GPU do swizzling. For dual channel interleaved,
+ * the GPU's interleave is bit 9 and 10 for X tiled, and bit
+ * 9 for Y tiled. The CPU's interleave is independent, and
+ * can be based on either bit 11 (haven't seen this yet) or
+ * bit 17 (common).
+ */
+ switch (dcc & DCC_ADDRESSING_MODE_MASK) {
+ case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
+ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ break;
+ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
+ if (dcc & DCC_CHANNEL_XOR_DISABLE) {
+ /*
+ * This is the base swizzling by the GPU for
+ * tiled buffers.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
+ /* Bit 11 swizzling by the CPU in addition. */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
+ swizzle_y = I915_BIT_6_SWIZZLE_9_11;
+ } else {
+ /* Bit 17 swizzling by the CPU in addition. */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+ swizzle_y = I915_BIT_6_SWIZZLE_9_17;
+ }
+ break;
+ }
+
+ /* check for L-shaped memory aka modified enhanced addressing */
+ if (IS_GEN(i915, 4) &&
+ !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+ }
+
+ if (dcc == 0xffffffff) {
+ drm_err(&i915->drm, "Couldn't read from MCHBAR. "
+ "Disabling tiling.\n");
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+ }
+ }
+
+ if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
+ swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
+ /*
+ * Userspace likes to explode if it sees unknown swizzling,
+ * so lie. We will finish the lie when reporting through
+ * the get-tiling-ioctl by reporting the physical swizzle
+ * mode as unknown instead.
+ *
+ * As we don't strictly know what the swizzling is, it may be
+ * bit17 dependent, and so we need to also prevent the pages
+ * from being moved.
+ */
+ i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+
+ i915->ggtt.bit_6_swizzle_x = swizzle_x;
+ i915->ggtt.bit_6_swizzle_y = swizzle_y;
+}
+
+/*
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static void swizzle_page(struct page *page)
+{
+ char temp[64];
+ char *vaddr;
+ int i;
+
+ vaddr = kmap(page);
+
+ for (i = 0; i < PAGE_SIZE; i += 128) {
+ memcpy(temp, &vaddr[i], 64);
+ memcpy(&vaddr[i], &vaddr[i + 64], 64);
+ memcpy(&vaddr[i + 64], temp, 64);
+ }
+
+ kunmap(page);
+}
+
+/**
+ * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
+ *
+ * This function fixes up the swizzling in case any page frame number for this
+ * object has changed in bit 17 since that state has been saved with
+ * i915_gem_object_save_bit_17_swizzle().
+ *
+ * This is called when pinning backing storage again, since the kernel is free
+ * to move unpinned backing storage around (either by directly moving pages or
+ * by swapping them out and back in again).
+ */
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int i;
+
+ if (obj->bit_17 == NULL)
+ return;
+
+ i = 0;
+ for_each_sgt_page(page, sgt_iter, pages) {
+ char new_bit_17 = page_to_phys(page) >> 17;
+ if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
+ swizzle_page(page);
+ set_page_dirty(page);
+ }
+ i++;
+ }
+}
+
+/**
+ * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
+ *
+ * This function saves the bit 17 of each page frame number so that swizzling
+ * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
+ * be called before the backing storage can be unpinned.
+ */
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int i;
+
+ if (obj->bit_17 == NULL) {
+ obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
+ if (obj->bit_17 == NULL) {
+ DRM_ERROR("Failed to allocate memory for bit 17 "
+ "record\n");
+ return;
+ }
+ }
+
+ i = 0;
+
+ for_each_sgt_page(page, sgt_iter, pages) {
+ if (page_to_phys(page) & (1 << 17))
+ __set_bit(i, obj->bit_17);
+ else
+ __clear_bit(i, obj->bit_17);
+ i++;
+ }
+}
+
+void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ int num_fences;
+ int i;
+
+ INIT_LIST_HEAD(&ggtt->fence_list);
+ INIT_LIST_HEAD(&ggtt->userfault_list);
+ intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm);
+
+ detect_bit_6_swizzle(ggtt);
+
+ if (!i915_ggtt_has_aperture(ggtt))
+ num_fences = 0;
+ else if (INTEL_GEN(i915) >= 7 &&
+ !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
+ num_fences = 32;
+ else if (INTEL_GEN(i915) >= 4 ||
+ IS_I945G(i915) || IS_I945GM(i915) ||
+ IS_G33(i915) || IS_PINEVIEW(i915))
+ num_fences = 16;
+ else
+ num_fences = 8;
+
+ if (intel_vgpu_active(i915))
+ num_fences = intel_uncore_read(uncore,
+ vgtif_reg(avail_rs.fence_num));
+ ggtt->fence_regs = kcalloc(num_fences,
+ sizeof(*ggtt->fence_regs),
+ GFP_KERNEL);
+ if (!ggtt->fence_regs)
+ num_fences = 0;
+
+ /* Initialize fence registers to zero */
+ for (i = 0; i < num_fences; i++) {
+ struct i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+ i915_active_init(&fence->active, NULL, NULL);
+ fence->ggtt = ggtt;
+ fence->id = i;
+ list_add_tail(&fence->link, &ggtt->fence_list);
+ }
+ ggtt->num_fences = num_fences;
+
+ intel_ggtt_restore_fences(ggtt);
+}
+
+void intel_ggtt_fini_fences(struct i915_ggtt *ggtt)
+{
+ int i;
+
+ for (i = 0; i < ggtt->num_fences; i++) {
+ struct i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+ i915_active_fini(&fence->active);
+ }
+
+ kfree(ggtt->fence_regs);
+}
+
+void intel_gt_init_swizzling(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+
+ if (INTEL_GEN(i915) < 5 ||
+ i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+ return;
+
+ intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
+
+ if (IS_GEN(i915, 5))
+ return;
+
+ intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL);
+
+ if (IS_GEN(i915, 6))
+ intel_uncore_write(uncore,
+ ARB_MODE,
+ _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
+ else if (IS_GEN(i915, 7))
+ intel_uncore_write(uncore,
+ ARB_MODE,
+ _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
+ else if (IS_GEN(i915, 8))
+ intel_uncore_write(uncore,
+ GAMTARBMODE,
+ _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
+ else
+ MISSING_CASE(INTEL_GEN(i915));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
new file mode 100644
index 000000000000..9eef679e1311
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __INTEL_GGTT_FENCING_H__
+#define __INTEL_GGTT_FENCING_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include "i915_active.h"
+
+struct drm_i915_gem_object;
+struct i915_ggtt;
+struct i915_vma;
+struct intel_gt;
+struct sg_table;
+
+#define I965_FENCE_PAGE 4096UL
+
+struct i915_fence_reg {
+ struct list_head link;
+ struct i915_ggtt *ggtt;
+ struct i915_vma *vma;
+ atomic_t pin_count;
+ struct i915_active active;
+ int id;
+ /**
+ * Whether the tiling parameters for the currently
+ * associated fence register have changed. Note that
+ * for the purposes of tracking tiling changes we also
+ * treat the unfenced register, the register slot that
+ * the object occupies whilst it executes a fenced
+ * command (such as BLT on gen2/3), as a "fence".
+ */
+ bool dirty;
+ u32 start;
+ u32 size;
+ u32 tiling;
+ u32 stride;
+};
+
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt);
+void i915_unreserve_fence(struct i915_fence_reg *fence);
+
+void intel_ggtt_restore_fences(struct i915_ggtt *ggtt);
+
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+
+void intel_ggtt_init_fences(struct i915_ggtt *ggtt);
+void intel_ggtt_fini_fences(struct i915_ggtt *ggtt);
+
+void intel_gt_init_swizzling(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 51b8718513bc..534e435f20bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,7 +138,7 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-#define MI_LRI_CS_MMIO (1<<19)
+#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
#define MI_LRI_FORCE_POSTED (1<<12)
#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
@@ -156,6 +156,7 @@
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+#define MI_LRR_SOURCE_CS_MMIO REG_BIT(18)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -235,9 +236,8 @@
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
-#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */
#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
-#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
+#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
@@ -292,10 +292,21 @@
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
-#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
-#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
-#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
+#define STATE_BASE_ADDRESS \
+ ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
+#define BASE_ADDRESS_MODIFY REG_BIT(0)
+#define PIPELINE_SELECT \
+ ((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16))
+#define PIPELINE_SELECT_MEDIA REG_BIT(0)
+#define GFX_OP_3DSTATE_VF_STATISTICS \
+ ((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16))
+#define MEDIA_VFE_STATE \
+ ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16))
#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \
+ ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16))
+#define MEDIA_OBJECT \
+ ((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16))
#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index f1f1b306e0af..f069551e412f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -7,6 +7,8 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_gt.h"
+#include "intel_gt_buffer_pool.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_mocs.h"
@@ -15,6 +17,7 @@
#include "intel_rps.h"
#include "intel_uncore.h"
#include "intel_pm.h"
+#include "shmem_utils.h"
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
@@ -26,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
+ intel_gt_init_buffer_pool(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
@@ -370,18 +374,6 @@ static struct i915_address_space *kernel_vm(struct intel_gt *gt)
return i915_vm_get(&gt->ggtt->vm);
}
-static int __intel_context_flush_retire(struct intel_context *ce)
-{
- struct intel_timeline *tl;
-
- tl = intel_context_timeline_lock(ce);
- if (IS_ERR(tl))
- return PTR_ERR(tl);
-
- intel_context_timeline_unlock(tl);
- return 0;
-}
-
static int __engines_record_defaults(struct intel_gt *gt)
{
struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -447,8 +439,7 @@ err_rq:
for (id = 0; id < ARRAY_SIZE(requests); id++) {
struct i915_request *rq;
- struct i915_vma *state;
- void *vaddr;
+ struct file *state;
rq = requests[id];
if (!rq)
@@ -460,48 +451,16 @@ err_rq:
}
GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
- state = rq->context->state;
- if (!state)
+ if (!rq->context->state)
continue;
- /* Serialise with retirement on another CPU */
- GEM_BUG_ON(!i915_request_completed(rq));
- err = __intel_context_flush_retire(rq->context);
- if (err)
- goto out;
-
- /* We want to be able to unbind the state from the GGTT */
- GEM_BUG_ON(intel_context_is_pinned(rq->context));
-
- /*
- * As we will hold a reference to the logical state, it will
- * not be torn down with the context, and importantly the
- * object will hold onto its vma (making it possible for a
- * stray GTT write to corrupt our defaults). Unmap the vma
- * from the GTT to prevent such accidents and reclaim the
- * space.
- */
- err = i915_vma_unbind(state);
- if (err)
- goto out;
-
- i915_gem_object_lock(state->obj);
- err = i915_gem_object_set_to_cpu_domain(state->obj, false);
- i915_gem_object_unlock(state->obj);
- if (err)
- goto out;
-
- i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
-
- /* Check we can acquire the image of the context state */
- vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
+ /* Keep a copy of the state's backing pages; free the obj */
+ state = shmem_create_from_object(rq->context->state->obj);
+ if (IS_ERR(state)) {
+ err = PTR_ERR(state);
goto out;
}
-
- rq->engine->default_state = i915_gem_object_get(state->obj);
- i915_gem_object_unpin_map(state->obj);
+ rq->engine->default_state = state;
}
out:
@@ -576,6 +535,8 @@ int intel_gt_init(struct intel_gt *gt)
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_gt_init_clock_frequency(gt);
+
err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
if (err)
goto out_fw;
@@ -592,7 +553,9 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
goto err_engines;
- intel_uc_init(&gt->uc);
+ err = intel_uc_init(&gt->uc);
+ if (err)
+ goto err_engines;
err = intel_gt_resume(gt);
if (err)
@@ -633,8 +596,7 @@ void intel_gt_driver_remove(struct intel_gt *gt)
{
__intel_gt_disable(gt);
- intel_uc_fini_hw(&gt->uc);
- intel_uc_fini(&gt->uc);
+ intel_uc_driver_remove(&gt->uc);
intel_engines_release(gt);
}
@@ -642,6 +604,13 @@ void intel_gt_driver_remove(struct intel_gt *gt)
void intel_gt_driver_unregister(struct intel_gt *gt)
{
intel_rps_driver_unregister(&gt->rps);
+
+ /*
+ * Upon unregistering the device to prevent any new users, cancel
+ * all in-flight requests so that we can quickly unbind the active
+ * resources.
+ */
+ intel_gt_set_wedged(gt);
}
void intel_gt_driver_release(struct intel_gt *gt)
@@ -654,10 +623,14 @@ void intel_gt_driver_release(struct intel_gt *gt)
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
+ intel_gt_fini_buffer_pool(gt);
}
void intel_gt_driver_late_release(struct intel_gt *gt)
{
+ /* We need to wait for inflight RCU frees to release their grip */
+ rcu_barrier();
+
intel_uc_driver_late_release(&gt->uc);
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 397186818305..1495054a4305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
/*
- * SPDX-License-Identifier: MIT
- *
* Copyright © 2014-2018 Intel Corporation
*/
@@ -8,15 +7,15 @@
#include "i915_drv.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
+#include "intel_gt_buffer_pool.h"
-static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
{
- return container_of(pool, struct intel_engine_cs, pool);
+ return container_of(pool, struct intel_gt, buffer_pool);
}
static struct list_head *
-bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
{
int n;
@@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz)
return &pool->cache_list[n];
}
-static void node_free(struct intel_engine_pool_node *node)
+static void node_free(struct intel_gt_buffer_pool_node *node)
{
i915_gem_object_put(node->obj);
i915_active_fini(&node->active);
kfree(node);
}
+static void pool_free_work(struct work_struct *wrk)
+{
+ struct intel_gt_buffer_pool *pool =
+ container_of(wrk, typeof(*pool), work.work);
+ struct intel_gt_buffer_pool_node *node, *next;
+ unsigned long old = jiffies - HZ;
+ bool active = false;
+ LIST_HEAD(stale);
+ int n;
+
+ /* Free buffers that have not been used in the past second */
+ spin_lock_irq(&pool->lock);
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct list_head *list = &pool->cache_list[n];
+
+ /* Most recent at head; oldest at tail */
+ list_for_each_entry_safe_reverse(node, next, list, link) {
+ if (time_before(node->age, old))
+ break;
+
+ list_move(&node->link, &stale);
+ }
+ active |= !list_empty(list);
+ }
+ spin_unlock_irq(&pool->lock);
+
+ list_for_each_entry_safe(node, next, &stale, link)
+ node_free(node);
+
+ if (active)
+ schedule_delayed_work(&pool->work,
+ round_jiffies_up_relative(HZ));
+}
+
static int pool_active(struct i915_active *ref)
{
- struct intel_engine_pool_node *node =
+ struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
struct dma_resv *resv = node->obj->base.resv;
int err;
@@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref)
__i915_active_call
static void pool_retire(struct i915_active *ref)
{
- struct intel_engine_pool_node *node =
+ struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
- struct intel_engine_pool *pool = node->pool;
+ struct intel_gt_buffer_pool *pool = node->pool;
struct list_head *list = bucket_for_size(pool, node->obj->base.size);
unsigned long flags;
- GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
i915_gem_object_unpin_pages(node->obj);
/* Return this object to the shrinker pool */
i915_gem_object_make_purgeable(node->obj);
spin_lock_irqsave(&pool->lock, flags);
+ node->age = jiffies;
list_add(&node->link, list);
spin_unlock_irqrestore(&pool->lock, flags);
+
+ schedule_delayed_work(&pool->work,
+ round_jiffies_up_relative(HZ));
}
-static struct intel_engine_pool_node *
-node_create(struct intel_engine_pool *pool, size_t sz)
+static struct intel_gt_buffer_pool_node *
+node_create(struct intel_gt_buffer_pool *pool, size_t sz)
{
- struct intel_engine_cs *engine = to_engine(pool);
- struct intel_engine_pool_node *node;
+ struct intel_gt *gt = to_gt(pool);
+ struct intel_gt_buffer_pool_node *node;
struct drm_i915_gem_object *obj;
node = kmalloc(sizeof(*node),
@@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
node->pool = pool;
i915_active_init(&node->active, pool_active, pool_retire);
- obj = i915_gem_object_create_internal(engine->i915, sz);
+ obj = i915_gem_object_create_internal(gt->i915, sz);
if (IS_ERR(obj)) {
i915_active_fini(&node->active);
kfree(node);
@@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return node;
}
-static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
{
- if (intel_engine_is_virtual(engine))
- engine = intel_virtual_engine_get_sibling(engine, 0);
-
- GEM_BUG_ON(!engine);
- return &engine->pool;
-}
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
-{
- struct intel_engine_pool *pool = lookup_pool(engine);
- struct intel_engine_pool_node *node;
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+ struct intel_gt_buffer_pool_node *node;
struct list_head *list;
unsigned long flags;
int ret;
- GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
size = PAGE_ALIGN(size);
list = bucket_for_size(pool, size);
@@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
return node;
}
-void intel_engine_pool_init(struct intel_engine_pool *pool)
+void intel_gt_init_buffer_pool(struct intel_gt *gt)
{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
spin_lock_init(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(&pool->cache_list[n]);
+ INIT_DELAYED_WORK(&pool->work, pool_free_work);
}
-void intel_engine_pool_park(struct intel_engine_pool *pool)
+static void pool_free_imm(struct intel_gt_buffer_pool *pool)
{
int n;
+ spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct intel_gt_buffer_pool_node *node, *next;
struct list_head *list = &pool->cache_list[n];
- struct intel_engine_pool_node *node, *nn;
- list_for_each_entry_safe(node, nn, list, link)
+ list_for_each_entry_safe(node, next, list, link)
node_free(node);
-
INIT_LIST_HEAD(list);
}
+ spin_unlock_irq(&pool->lock);
+}
+
+void intel_gt_flush_buffer_pool(struct intel_gt *gt)
+{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+
+ if (cancel_delayed_work_sync(&pool->work))
+ pool_free_imm(pool);
}
-void intel_engine_pool_fini(struct intel_engine_pool *pool)
+void intel_gt_fini_buffer_pool(struct intel_gt *gt)
{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
+ intel_gt_flush_buffer_pool(gt);
+
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
new file mode 100644
index 000000000000..42cbac003e8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_GT_BUFFER_POOL_H
+#define INTEL_GT_BUFFER_POOL_H
+
+#include <linux/types.h>
+
+#include "i915_active.h"
+#include "intel_gt_buffer_pool_types.h"
+
+struct intel_gt;
+struct i915_request;
+
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
+
+static inline int
+intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
+ struct i915_request *rq)
+{
+ return i915_active_add_request(&node->active, rq);
+}
+
+static inline void
+intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node)
+{
+ i915_active_release(&node->active);
+}
+
+void intel_gt_init_buffer_pool(struct intel_gt *gt);
+void intel_gt_flush_buffer_pool(struct intel_gt *gt);
+void intel_gt_fini_buffer_pool(struct intel_gt *gt);
+
+#endif /* INTEL_GT_BUFFER_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e31ee361b76f..e28bdda771ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -4,26 +4,29 @@
* Copyright © 2014-2018 Intel Corporation
*/
-#ifndef INTEL_ENGINE_POOL_TYPES_H
-#define INTEL_ENGINE_POOL_TYPES_H
+#ifndef INTEL_GT_BUFFER_POOL_TYPES_H
+#define INTEL_GT_BUFFER_POOL_TYPES_H
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
#include "i915_active_types.h"
struct drm_i915_gem_object;
-struct intel_engine_pool {
+struct intel_gt_buffer_pool {
spinlock_t lock;
struct list_head cache_list[4];
+ struct delayed_work work;
};
-struct intel_engine_pool_node {
+struct intel_gt_buffer_pool_node {
struct i915_active active;
struct drm_i915_gem_object *obj;
struct list_head link;
- struct intel_engine_pool *pool;
+ struct intel_gt_buffer_pool *pool;
+ unsigned long age;
};
-#endif /* INTEL_ENGINE_POOL_TYPES_H */
+#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
new file mode 100644
index 000000000000..999079686846
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
+
+#define MHZ_12 12000000 /* 12MHz (24MHz/2), 83.333ns */
+#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */
+#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */
+
+static u32 read_clock_frequency(const struct intel_gt *gt)
+{
+ if (INTEL_GEN(gt->i915) >= 11) {
+ u32 config;
+
+ config = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+ config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK;
+ config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+ switch (config) {
+ case 0: return MHZ_12;
+ case 1:
+ case 2: return MHZ_19_2;
+ default:
+ case 3: return MHZ_12_5;
+ }
+ } else if (INTEL_GEN(gt->i915) >= 9) {
+ if (IS_GEN9_LP(gt->i915))
+ return MHZ_19_2;
+ else
+ return MHZ_12;
+ } else {
+ return MHZ_12_5;
+ }
+}
+
+void intel_gt_init_clock_frequency(struct intel_gt *gt)
+{
+ /*
+ * Note that on gen11+, the clock frequency may be reconfigured.
+ * We do not, and we assume nobody else does.
+ */
+ gt->clock_frequency = read_clock_frequency(gt);
+ GT_TRACE(gt,
+ "Using clock frequency: %dkHz\n",
+ gt->clock_frequency / 1000);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_gt_check_clock_frequency(const struct intel_gt *gt)
+{
+ if (gt->clock_frequency != read_clock_frequency(gt)) {
+ dev_err(gt->i915->drm.dev,
+ "GT clock frequency changed, was %uHz, now %uHz!\n",
+ gt->clock_frequency,
+ read_clock_frequency(gt));
+ }
+}
+#endif
+
+static u64 div_u64_roundup(u64 nom, u32 den)
+{
+ return div_u64(nom + den - 1, den);
+}
+
+u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count)
+{
+ return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000),
+ gt->clock_frequency);
+}
+
+u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count)
+{
+ return intel_gt_clock_interval_to_ns(gt, 16 * count);
+}
+
+u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns)
+{
+ return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns),
+ 1000 * 1000 * 1000);
+}
+
+u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns)
+{
+ u32 val;
+
+ /*
+ * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
+ * 8300) freezing up around GPU hangs. Looks as if even
+ * scheduling/timer interrupts start misbehaving if the RPS
+ * EI/thresholds are "bad", leading to a very sluggish or even
+ * frozen machine.
+ */
+ val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16);
+ if (IS_GEN(gt->i915, 6))
+ val = roundup(val, 25);
+
+ return val;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h
new file mode 100644
index 000000000000..f793c89f2cbd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CLOCK_UTILS_H__
+#define __INTEL_GT_CLOCK_UTILS_H__
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+void intel_gt_init_clock_frequency(struct intel_gt *gt);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_gt_check_clock_frequency(const struct intel_gt *gt);
+#else
+static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {}
+#endif
+
+u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count);
+u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count);
+
+u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns);
+u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns);
+
+#endif /* __INTEL_GT_CLOCK_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..0cc7dd54f4f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
+ if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+ WRITE_ONCE(engine->execlists.yield,
+ ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+ ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+ engine->execlists.yield);
+ if (del_timer(&engine->execlists.timer))
+ tasklet = true;
+ }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
@@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT;
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT;
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 8b653c0f5e5f..6bdb434a442d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -12,6 +12,7 @@
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_llc.h"
@@ -138,6 +139,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_gt_check_clock_frequency(gt);
+
/*
* As we have just resumed the machine and woken the device up from
* deep PCI sleep (presumably D3_cold), assume the HW has been reset
@@ -155,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
intel_uc_reset_prepare(&gt->uc);
+ for_each_engine(engine, gt, id)
+ if (engine->sanitize)
+ engine->sanitize(engine);
+
if (reset_engines(gt) || force) {
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, false);
@@ -164,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (engine->reset.finish)
engine->reset.finish(engine);
+ intel_rps_sanitize(&gt->rps);
+
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
@@ -191,11 +200,12 @@ int intel_gt_resume(struct intel_gt *gt)
* Only the kernel contexts should remain pinned over suspend,
* allowing us to fixup the user contexts on their first pin.
*/
+ gt_sanitize(gt, true);
+
intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
- gt_sanitize(gt, true);
if (intel_gt_is_wedged(gt)) {
err = -EIO;
goto out_fw;
@@ -204,7 +214,7 @@ int intel_gt_resume(struct intel_gt *gt)
/* Only when the HW is re-initialised, can we replay the requests */
err = intel_gt_init_hw(gt);
if (err) {
- dev_err(gt->i915->drm.dev,
+ drm_err(&gt->i915->drm,
"Failed to initialize GPU, declaring it wedged!\n");
goto err_wedged;
}
@@ -220,7 +230,7 @@ int intel_gt_resume(struct intel_gt *gt)
intel_engine_pm_put(engine);
if (err) {
- dev_err(gt->i915->drm.dev,
+ drm_err(&gt->i915->drm,
"Failed to restart %s (%d)\n",
engine->name, err);
goto err_wedged;
@@ -324,6 +334,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
{
GT_TRACE(gt, "\n");
intel_gt_init_swizzling(gt);
+ intel_ggtt_restore_fences(gt->ggtt);
return intel_uc_runtime_resume(&gt->uc);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 8a5054f21bf8..16ff47c83bd5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -26,6 +26,11 @@ static bool retire_requests(struct intel_timeline *tl)
return !i915_active_fence_isset(&tl->last_request);
}
+static bool engine_active(const struct intel_engine_cs *engine)
+{
+ return !list_empty(&engine->kernel_context->timeline->requests);
+}
+
static bool flush_submission(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -37,8 +42,13 @@ static bool flush_submission(struct intel_gt *gt)
for_each_engine(engine, gt, id) {
intel_engine_flush_submission(engine);
- active |= flush_work(&engine->retire_work);
- active |= flush_work(&engine->wakeref.work);
+
+ /* Flush the background retirement and idle barriers */
+ flush_work(&engine->retire_work);
+ flush_delayed_work(&engine->wakeref.work);
+
+ /* Is the idle barrier still outstanding? */
+ active |= engine_active(engine);
}
return active;
@@ -147,25 +157,32 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
fence = i915_active_fence_get(&tl->last_request);
if (fence) {
+ mutex_unlock(&tl->mutex);
+
timeout = dma_fence_wait_timeout(fence,
interruptible,
timeout);
dma_fence_put(fence);
+
+ /* Retirement is best effort */
+ if (!mutex_trylock(&tl->mutex)) {
+ active_count++;
+ goto out_active;
+ }
}
}
- if (!retire_requests(tl) || flush_submission(gt))
+ if (!retire_requests(tl))
active_count++;
+ mutex_unlock(&tl->mutex);
- spin_lock(&timelines->lock);
+out_active: spin_lock(&timelines->lock);
- /* Resume iteration after dropping lock */
+ /* Resume list iteration after reacquiring spinlock */
list_safe_reset_next(tl, tn, link);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
- mutex_unlock(&tl->mutex);
-
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(atomic_read(&tl->active_count));
@@ -177,6 +194,9 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
+ if (flush_submission(gt)) /* Wait, there's more! */
+ active_count++;
+
return active_count ? timeout : 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 96890dd12b5f..0cc1d6b185dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,6 +17,7 @@
#include "i915_vma.h"
#include "intel_engine_types.h"
+#include "intel_gt_buffer_pool_types.h"
#include "intel_llc_types.h"
#include "intel_reset_types.h"
#include "intel_rc6_types.h"
@@ -61,6 +62,7 @@ struct intel_gt {
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
+ ktime_t last_init_time;
struct intel_reset reset;
/**
@@ -72,14 +74,12 @@ struct intel_gt {
*/
intel_wakeref_t awake;
+ u32 clock_frequency;
+
struct intel_llc llc;
struct intel_rc6 rc6;
struct intel_rps rps;
- ktime_t last_init_time;
-
- struct i915_vma *scratch;
-
spinlock_t irq_lock;
u32 gt_imr;
u32 pm_ier;
@@ -97,6 +97,18 @@ struct intel_gt {
* Reserved for exclusive use by the kernel.
*/
struct i915_address_space *vm;
+
+ /*
+ * A pool of objects to use as shadow copies of client batch buffers
+ * when the command parser is enabled. Prevents the client from
+ * modifying the batch contents after software parsing.
+ *
+ * Buffers older than 1s are periodically reaped from the pool,
+ * or may be reclaimed by the shrinker before then.
+ */
+ struct intel_gt_buffer_pool buffer_pool;
+
+ struct i915_vma *scratch;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index bb9a6e638175..2a72cce63fd9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -171,7 +171,9 @@ void __i915_vm_close(struct i915_address_space *vm)
{
struct i915_vma *vma, *vn;
- mutex_lock(&vm->mutex);
+ if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
+ return;
+
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
@@ -186,6 +188,7 @@ void __i915_vm_close(struct i915_address_space *vm)
i915_gem_object_put(obj);
}
GEM_BUG_ON(!list_empty(&vm->bound_list));
+
mutex_unlock(&vm->mutex);
}
@@ -484,30 +487,6 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
}
-u64 gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
-
- if (unlikely(flags & PTE_READ_ONLY))
- pte &= ~_PAGE_RW;
-
- switch (level) {
- case I915_CACHE_NONE:
- pte |= PPAT_UNCACHED;
- break;
- case I915_CACHE_WT:
- pte |= PPAT_DISPLAY_ELLC;
- break;
- default:
- pte |= PPAT_CACHED;
- break;
- }
-
- return pte;
-}
-
static void tgl_setup_private_ppat(struct intel_uncore *uncore)
{
/* TGL doesn't support LLC or AGE settings */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 23004445806a..d93ebdf3fa0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -26,7 +26,6 @@
#include <drm/drm_mm.h>
#include "gt/intel_reset.h"
-#include "i915_gem_fence_reg.h"
#include "i915_selftest.h"
#include "i915_vma_types.h"
@@ -135,6 +134,8 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
+struct i915_fence_reg;
+
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
@@ -333,7 +334,7 @@ struct i915_ggtt {
u32 pin_bias;
unsigned int num_fences;
- struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+ struct i915_fence_reg *fence_regs;
struct list_head fence_list;
/**
@@ -429,8 +430,7 @@ static inline void
i915_vm_close(struct i915_address_space *vm)
{
GEM_BUG_ON(!atomic_read(&vm->open));
- if (atomic_dec_and_test(&vm->open))
- __i915_vm_close(vm);
+ __i915_vm_close(vm);
i915_vm_put(vm);
}
@@ -515,10 +515,6 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
-u64 gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags);
-
int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index ba31cbe8c68e..87e6c5bdd2dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -147,6 +147,7 @@
#include "intel_reset.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
+#include "shmem_utils.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
#define RING_EXECLIST1_VALID (1 << 0x3)
@@ -216,7 +217,7 @@ struct virtual_engine {
/* And finally, which physical engines this virtual engine maps onto. */
unsigned int num_siblings;
- struct intel_engine_cs *siblings[0];
+ struct intel_engine_cs *siblings[];
};
static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
@@ -238,6 +239,123 @@ __execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head);
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x74;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x68;
+ else if (engine->class == RENDER_CLASS)
+ return 0xd8;
+ else
+ return -1;
+}
+
+static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x12;
+ else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
+ return 0x18;
+ else
+ return -1;
+}
+
+static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_wa_bb_per_ctx(engine);
+ if (x < 0)
+ return x;
+
+ return x + 2;
+}
+
+static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_indirect_ptr(engine);
+ if (x < 0)
+ return x;
+
+ return x + 2;
+}
+
+static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
+{
+ if (engine->class != RENDER_CLASS)
+ return -1;
+
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0xb6;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return 0xaa;
+ else
+ return -1;
+}
+
+static u32
+lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
+{
+ switch (INTEL_GEN(engine->i915)) {
+ default:
+ MISSING_CASE(INTEL_GEN(engine->i915));
+ fallthrough;
+ case 12:
+ return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 11:
+ return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 10:
+ return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 9:
+ return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 8:
+ return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ }
+}
+
+static void
+lrc_ring_setup_indirect_ctx(u32 *regs,
+ const struct intel_engine_cs *engine,
+ u32 ctx_bb_ggtt_addr,
+ u32 size)
+{
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
+ GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
+ regs[lrc_ring_indirect_ptr(engine) + 1] =
+ ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
+
+ GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
+ regs[lrc_ring_indirect_offset(engine) + 1] =
+ lrc_ring_indirect_offset_default(engine) << 6;
+}
+
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
static void mark_eio(struct i915_request *rq)
{
if (i915_request_completed(rq))
@@ -245,7 +363,7 @@ static void mark_eio(struct i915_request *rq)
GEM_BUG_ON(i915_request_signaled(rq));
- dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
}
@@ -293,7 +411,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->sched.attr.priority;
+ return READ_ONCE(rq->sched.attr.priority);
}
static int effective_prio(const struct i915_request *rq)
@@ -311,18 +429,7 @@ static int effective_prio(const struct i915_request *rq)
if (i915_request_has_nopreempt(rq))
prio = I915_PRIORITY_UNPREEMPTABLE;
- /*
- * On unwinding the active request, we give it a priority bump
- * if it has completed waiting on any semaphore. If we know that
- * the request has already started, we can prevent an unwanted
- * preempt-to-idle cycle by taking that into account now.
- */
- if (__i915_request_has_started(rq))
- prio |= I915_PRIORITY_NOSEMAPHORE;
-
- /* Restrict mere WAIT boosts from triggering preemption */
- BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
- return prio | __NO_PREEMPTION;
+ return prio;
}
static int queue_prio(const struct intel_engine_execlists *execlists)
@@ -456,10 +563,10 @@ assert_priority_queue(const struct i915_request *prev,
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/
-static u64
+static u32
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- u64 desc;
+ u32 desc;
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
@@ -470,21 +577,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
- desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
- /*
- * The following 32bits are copied into the OA reports (dword 2).
- * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
- * anything below.
- */
- if (INTEL_GEN(engine->i915) >= 11) {
- desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
- /* bits 48-53 */
-
- desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
- /* bits 61-63 */
- }
-
- return desc;
+ return i915_ggtt_offset(ce->state) | desc;
}
static inline unsigned int dword_in_page(void *addr)
@@ -503,7 +596,7 @@ static void set_offsets(u32 *regs,
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END(x) 0, (x)
+#define END(total_state_size) 0, (total_state_size)
{
const u32 base = engine->mmio_base;
@@ -526,7 +619,7 @@ static void set_offsets(u32 *regs,
if (flags & POSTED)
*regs |= MI_LRI_FORCE_POSTED;
if (INTEL_GEN(engine->i915) >= 11)
- *regs |= MI_LRI_CS_MMIO;
+ *regs |= MI_LRI_LRM_CS_MMIO;
regs++;
GEM_BUG_ON(!count);
@@ -911,8 +1004,63 @@ static const u8 gen12_rcs_offsets[] = {
NOP(6),
LRI(1, 0),
REG(0x0c8),
+ NOP(3 + 9 + 1),
+
+ LRI(51, POSTED),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+ REG(0x084),
+ NOP(1),
- END(80)
+ END(192)
};
#undef END
@@ -1004,7 +1152,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
i915_request_cancel_breadcrumb(rq);
spin_unlock(&rq->lock);
}
- rq->engine = owner;
+ WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
}
@@ -1040,17 +1188,14 @@ static void intel_engine_context_in(struct intel_engine_cs *engine)
{
unsigned long flags;
- if (READ_ONCE(engine->stats.enabled) == 0)
+ if (atomic_add_unless(&engine->stats.active, 1, 0))
return;
write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
+ if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
+ engine->stats.start = ktime_get();
+ atomic_inc(&engine->stats.active);
}
-
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
@@ -1058,51 +1203,20 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
{
unsigned long flags;
- if (READ_ONCE(engine->stats.enabled) == 0)
+ GEM_BUG_ON(!atomic_read(&engine->stats.active));
+
+ if (atomic_add_unless(&engine->stats.active, -1, 1))
return;
write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last;
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(ktime_get(), engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- }
+ if (atomic_dec_and_test(&engine->stats.active)) {
+ engine->stats.total =
+ ktime_add(engine->stats.total,
+ ktime_sub(ktime_get(), engine->stats.start));
}
-
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x60;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x54;
- else if (engine->class == RENDER_CLASS)
- return 0x58;
- else
- return -1;
-}
-
static void
execlists_check_context(const struct intel_context *ce,
const struct intel_engine_cs *engine)
@@ -1146,14 +1260,12 @@ execlists_check_context(const struct intel_context *ce,
static void restore_default_state(struct intel_context *ce,
struct intel_engine_cs *engine)
{
- u32 *regs = ce->lrc_reg_state;
+ u32 *regs;
- if (engine->pinned_default_state)
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
+ regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
+ execlists_init_reg_state(regs, ce, engine, ce->ring, true);
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+ ce->runtime.last = intel_context_get_runtime(ce);
}
static void reset_active(struct i915_request *rq,
@@ -1192,18 +1304,7 @@ static void reset_active(struct i915_request *rq,
__execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
-}
-
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
- /*
- * We can use either ppHWSP[16] which is recorded before the context
- * switch (and so excludes the cost of context switches) or use the
- * value from the context image itself, which is saved/restored earlier
- * and so includes the cost of the save.
- */
- return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
@@ -1251,18 +1352,23 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
- ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
- ce->lrc_desc |= (u64)ce->tag << 32;
+ GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
+ ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc |=
- (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
- GEN11_SW_CTX_ID_SHIFT;
- BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ unsigned int tag = ffs(READ_ONCE(engine->context_tag));
+
+ GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
+ clear_bit(tag - 1, &engine->context_tag);
+ ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+
+ BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
}
+ ce->lrc.ccid |= engine->execlists.ccid;
+
__intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1302,7 +1408,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static inline void
__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
+ struct intel_engine_cs * const engine,
+ unsigned int ccid)
{
struct intel_context * const ce = rq->context;
@@ -1316,10 +1423,18 @@ __execlists_schedule_out(struct i915_request *rq,
* If we have just completed this context, the engine may now be
* idle and we want to re-enter powersaving.
*/
- if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
+ ccid &= GEN12_MAX_CONTEXT_HW_ID;
+ if (ccid < BITS_PER_LONG) {
+ GEM_BUG_ON(ccid == 0);
+ GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
+ set_bit(ccid - 1, &engine->context_tag);
+ }
+
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -1345,15 +1460,17 @@ execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
+ u32 ccid;
trace_i915_request_out(rq);
+ ccid = rq->context->lrc.ccid;
old = READ_ONCE(ce->inflight);
do
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
while (!try_cmpxchg(&ce->inflight, &old, cur));
if (!cur)
- __execlists_schedule_out(rq, old);
+ __execlists_schedule_out(rq, old, ccid);
i915_request_put(rq);
}
@@ -1361,7 +1478,7 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- u64 desc = ce->lrc_desc;
+ u64 desc = ce->lrc.desc;
u32 tail, prev;
/*
@@ -1400,7 +1517,7 @@ static u64 execlists_update_context(struct i915_request *rq)
*/
wmb();
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1415,6 +1532,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
}
}
+static __maybe_unused char *
+dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
+{
+ if (!rq)
+ return "";
+
+ snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
+ prefix,
+ rq->context->lrc.ccid,
+ rq->fence.context, rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ rq_prio(rq));
+
+ return buf;
+}
+
static __maybe_unused void
trace_ports(const struct intel_engine_execlists *execlists,
const char *msg,
@@ -1422,18 +1557,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
{
const struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
+ char __maybe_unused p0[40], p1[40];
if (!ports[0])
return;
- ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
- ports[0]->fence.context,
- ports[0]->fence.seqno,
- i915_request_completed(ports[0]) ? "!" :
- i915_request_started(ports[0]) ? "*" :
- "",
- ports[1] ? ports[1]->fence.context : 0,
- ports[1] ? ports[1]->fence.seqno : 0);
+ ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
+ dump_port(p0, sizeof(p0), "", ports[0]),
+ dump_port(p1, sizeof(p1), ", ", ports[1]));
}
static inline bool
@@ -1446,8 +1577,12 @@ static __maybe_unused bool
assert_pending_valid(const struct intel_engine_execlists *execlists,
const char *msg)
{
+ struct intel_engine_cs *engine =
+ container_of(execlists, typeof(*engine), execlists);
struct i915_request * const *port, *rq;
struct intel_context *ce = NULL;
+ bool sentinel = false;
+ u32 ccid = -1;
trace_ports(execlists, msg, execlists->pending);
@@ -1456,13 +1591,14 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
return true;
if (!execlists->pending[0]) {
- GEM_TRACE_ERR("Nothing pending for promotion!\n");
+ GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
+ engine->name);
return false;
}
if (execlists->pending[execlists_num_ports(execlists)]) {
- GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
- execlists_num_ports(execlists));
+ GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
+ engine->name, execlists_num_ports(execlists));
return false;
}
@@ -1474,13 +1610,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
GEM_BUG_ON(!i915_request_is_active(rq));
if (ce == rq->context) {
- GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
return false;
}
ce = rq->context;
+ if (ccid == ce->lrc.ccid) {
+ GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
+ engine->name,
+ ccid, ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+ ccid = ce->lrc.ccid;
+
+ /*
+ * Sentinels are supposed to be lonely so they flush the
+ * current exection off the HW. Check that they are the
+ * only request in the pending submission.
+ */
+ if (sentinel) {
+ GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
+ engine->name,
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
+ sentinel = i915_request_has_sentinel(rq);
+ if (sentinel && port != execlists->pending) {
+ GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
+ engine->name,
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
@@ -1490,7 +1658,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
if (i915_active_is_idle(&ce->active) &&
!intel_context_is_barrier(ce)) {
- GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
ok = false;
@@ -1498,7 +1667,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
if (!i915_vma_is_pinned(ce->state)) {
- GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
ok = false;
@@ -1506,7 +1676,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
if (!i915_vma_is_pinned(ce->ring->vma)) {
- GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
ok = false;
@@ -1576,6 +1747,11 @@ static bool can_merge_ctx(const struct intel_context *prev,
return true;
}
+static unsigned long i915_request_flags(const struct i915_request *rq)
+{
+ return READ_ONCE(rq->fence.flags);
+}
+
static bool can_merge_rq(const struct i915_request *prev,
const struct i915_request *next)
{
@@ -1593,7 +1769,7 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
- if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
(BIT(I915_FENCE_FLAG_NOPREEMPT) |
BIT(I915_FENCE_FLAG_SENTINEL))))
return false;
@@ -1601,6 +1777,7 @@ static bool can_merge_rq(const struct i915_request *prev,
if (!can_merge_ctx(prev->context, next->context))
return false;
+ GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
return true;
}
@@ -1635,31 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
- struct intel_engine_cs *engine)
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
{
- struct intel_engine_cs *old = ve->siblings[0];
-
- /* All unattached (rq->engine == old) must already be completed */
-
- spin_lock(&old->breadcrumbs.irq_lock);
- if (!list_empty(&ve->context.signal_link)) {
- list_move_tail(&ve->context.signal_link,
- &engine->breadcrumbs.signalers);
- intel_engine_signal_breadcrumbs(engine);
- }
- spin_unlock(&old->breadcrumbs.irq_lock);
-}
-
-static struct i915_request *
-last_active(const struct intel_engine_execlists *execlists)
-{
- struct i915_request * const *last = READ_ONCE(execlists->active);
-
- while (*last && i915_request_completed(*last))
- last++;
-
- return *last;
+ /*
+ * All the outstanding signals on ve->siblings[0] must have
+ * been completed, just pending the interrupt handler. As those
+ * signals still refer to the old sibling (via rq->engine), we must
+ * transfer those to the old irq_worker to keep our locking
+ * consistent.
+ */
+ intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
}
#define for_each_waiter(p__, rq__) \
@@ -1668,9 +1830,9 @@ last_active(const struct intel_engine_execlists *execlists)
wait_link)
#define for_each_signaler(p__, rq__) \
- list_for_each_entry_lockless(p__, \
- &(rq__)->sched.signalers_list, \
- signal_link)
+ list_for_each_entry_rcu(p__, \
+ &(rq__)->sched.signalers_list, \
+ signal_link)
static void defer_request(struct i915_request *rq, struct list_head * const pl)
{
@@ -1693,12 +1855,16 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
/* No waiter should start before its signaler */
- GEM_BUG_ON(i915_request_started(w) &&
+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
+ i915_request_started(w) &&
!i915_request_completed(rq));
GEM_BUG_ON(i915_request_is_active(w));
@@ -1728,22 +1894,47 @@ static void defer_active(struct intel_engine_cs *engine)
}
static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
int hint;
if (!intel_engine_has_timeslices(engine))
return false;
- if (list_is_last(&rq->sched.link, &engine->active.requests))
- return false;
-
- hint = max(rq_prio(list_next_entry(rq, sched.link)),
- engine->execlists.queue_priority_hint);
+ hint = engine->execlists.queue_priority_hint;
+ if (!list_is_last(&rq->sched.link, &engine->active.requests))
+ hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
return hint >= effective_prio(rq);
}
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ /*
+ * Once bitten, forever smitten!
+ *
+ * If the active context ever busy-waited on a semaphore,
+ * it will be treated as a hog until the end of its timeslice (i.e.
+ * until it is scheduled out and replaced by a new submission,
+ * possibly even its own lite-restore). The HW only sends an interrupt
+ * on the first miss, and we do know if that semaphore has been
+ * signaled, or even if it is now stuck on another semaphore. Play
+ * safe, yield if it might be stuck -- it will be given a fresh
+ * timeslice in the near future.
+ */
+ return rq->context->lrc.ccid == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
static int
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
{
@@ -1759,15 +1950,15 @@ timeslice(const struct intel_engine_cs *engine)
return READ_ONCE(engine->props.timeslice_duration_ms);
}
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
{
- const struct i915_request *rq = *engine->execlists.active;
+ const struct intel_engine_execlists *execlists = &engine->execlists;
+ const struct i915_request *rq = *execlists->active;
if (!rq || i915_request_completed(rq))
return 0;
- if (engine->execlists.switch_priority_hint < effective_prio(rq))
+ if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
return 0;
return timeslice(engine);
@@ -1775,10 +1966,39 @@ active_timeslice(const struct intel_engine_cs *engine)
static void set_timeslice(struct intel_engine_cs *engine)
{
+ unsigned long duration;
+
if (!intel_engine_has_timeslices(engine))
return;
- set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
+ duration = active_timeslice(engine);
+ ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
+
+ set_timer_ms(&engine->execlists.timer, duration);
+}
+
+static void start_timeslice(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+ const int prio = queue_prio(execlists);
+ unsigned long duration;
+
+ if (!intel_engine_has_timeslices(engine))
+ return;
+
+ WRITE_ONCE(execlists->switch_priority_hint, prio);
+ if (prio == INT_MIN)
+ return;
+
+ if (timer_pending(&execlists->timer))
+ return;
+
+ duration = timeslice(engine);
+ ENGINE_TRACE(engine,
+ "start timeslicing, prio:%d, interval:%lu",
+ prio, duration);
+
+ set_timer_ms(&execlists->timer, duration);
}
static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1786,11 +2006,9 @@ static void record_preemption(struct intel_engine_execlists *execlists)
(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
}
-static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- struct i915_request *rq;
-
- rq = last_active(&engine->execlists);
if (!rq)
return 0;
@@ -1801,13 +2019,14 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
return READ_ONCE(engine->props.preempt_timeout_ms);
}
-static void set_preempt_timeout(struct intel_engine_cs *engine)
+static void set_preempt_timeout(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
if (!intel_engine_has_preempt_reset(engine))
return;
set_timer_ms(&engine->execlists.preempt,
- active_preempt_timeout(engine));
+ active_preempt_timeout(engine, rq));
}
static inline void clear_ports(struct i915_request **ports, int count)
@@ -1820,6 +2039,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
struct i915_request ** const last_port = port + execlists->port_mask;
+ struct i915_request * const *active;
struct i915_request *last;
struct rb_node *rb;
bool submit = false;
@@ -1874,9 +2094,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* i.e. we will retrigger preemption following the ack in case
* of trouble.
*/
- last = last_active(execlists);
- if (last) {
+ active = READ_ONCE(execlists->active);
+
+ /*
+ * In theory we can skip over completed contexts that have not
+ * yet been processed by events (as those events are in flight):
+ *
+ * while ((last = *active) && i915_request_completed(last))
+ * active++;
+ *
+ * However, the GPU cannot handle this as it will ultimately
+ * find itself trying to jump back into a context it has just
+ * completed and barf.
+ */
+
+ if ((last = *active)) {
if (need_preempt(engine, last, rb)) {
+ if (i915_request_completed(last)) {
+ tasklet_hi_schedule(&execlists->tasklet);
+ return;
+ }
+
ENGINE_TRACE(engine,
"preempting last=%llx:%lld, prio=%d, hint=%d\n",
last->fence.context,
@@ -1903,13 +2141,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = NULL;
} else if (need_timeslice(engine, last) &&
- timer_expired(&engine->execlists.timer)) {
+ timeslice_expired(execlists, last)) {
+ if (i915_request_completed(last)) {
+ tasklet_hi_schedule(&execlists->tasklet);
+ return;
+ }
+
ENGINE_TRACE(engine,
- "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
last->fence.seqno,
last->sched.attr.priority,
- execlists->queue_priority_hint);
+ execlists->queue_priority_hint,
+ yesno(timeslice_yield(execlists, last)));
ring_set_paused(engine, 1);
defer_active(engine);
@@ -1944,11 +2188,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- if (!execlists->timer.expires &&
- need_timeslice(engine, last))
- set_timer_ms(&execlists->timer,
- timeslice(engine));
-
+ start_timeslice(engine);
return;
}
}
@@ -1983,7 +2223,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- return; /* leave this for another */
+ start_timeslice(engine);
+ return; /* leave this for another sibling */
}
ENGINE_TRACE(engine,
@@ -1995,13 +2236,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
"",
yesno(engine != ve->siblings[0]));
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
+ WRITE_ONCE(ve->request, NULL);
+ WRITE_ONCE(ve->base.execlists.queue_priority_hint,
+ INT_MIN);
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
- rq->engine = engine;
+ WRITE_ONCE(rq->engine, engine);
if (engine != ve->siblings[0]) {
u32 *regs = ve->context.lrc_reg_state;
@@ -2014,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
engine);
if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve, engine);
+ virtual_xfer_breadcrumbs(ve);
/*
* Move the bound engine to the top of the list
@@ -2121,6 +2363,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(last &&
!can_merge_ctx(last->context,
rq->context));
+ GEM_BUG_ON(last &&
+ i915_seqno_passed(last->fence.seqno,
+ rq->fence.seqno));
submit = true;
last = rq;
@@ -2159,7 +2404,7 @@ done:
* Skip if we ended up with exactly the same set of requests,
* e.g. trying to timeslice a pair of ordered contexts
*/
- if (!memcmp(execlists->active, execlists->pending,
+ if (!memcmp(active, execlists->pending,
(port - execlists->pending + 1) * sizeof(*port))) {
do
execlists_schedule_out(fetch_and_zero(port));
@@ -2169,8 +2414,9 @@ done:
}
clear_ports(port + 1, last_port - port);
+ WRITE_ONCE(execlists->yield, -1);
+ set_preempt_timeout(engine, *active);
execlists_submit_ports(engine);
- set_preempt_timeout(engine);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -2191,6 +2437,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
execlists_schedule_out(*port);
clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+ smp_wmb(); /* complete the seqlock for execlists_active() */
WRITE_ONCE(execlists->active, execlists->inflight);
}
@@ -2339,12 +2586,11 @@ static void process_csb(struct intel_engine_cs *engine)
if (promote) {
struct i915_request * const *old = execlists->active;
- GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
-
ring_set_paused(engine, 0);
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
+ smp_wmb(); /* notify execlists_active() */
/* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", old);
@@ -2352,11 +2598,13 @@ static void process_csb(struct intel_engine_cs *engine)
execlists_schedule_out(*old++);
/* switch pending to inflight */
- WRITE_ONCE(execlists->active,
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending)));
+ GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending));
+ smp_wmb(); /* complete the seqlock */
+ WRITE_ONCE(execlists->active, execlists->inflight);
WRITE_ONCE(execlists->pending[0], NULL);
} else {
@@ -2369,17 +2617,21 @@ static void process_csb(struct intel_engine_cs *engine)
* We rely on the hardware being strongly
* ordered, that the breadcrumb write is
* coherent (visible from the CPU) before the
- * user interrupt and CSB is processed.
+ * user interrupt is processed. One might assume
+ * that the breadcrumb write being before the
+ * user interrupt and the CS event for the context
+ * switch would therefore be before the CS event
+ * itself...
*/
if (GEM_SHOW_DEBUG() &&
- !i915_request_completed(*execlists->active) &&
- !reset_in_progress(execlists)) {
- struct i915_request *rq __maybe_unused =
- *execlists->active;
+ !i915_request_completed(*execlists->active)) {
+ struct i915_request *rq = *execlists->active;
const u32 *regs __maybe_unused =
rq->context->lrc_reg_state;
ENGINE_TRACE(engine,
+ "context completed before request!\n");
+ ENGINE_TRACE(engine,
"ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
ENGINE_READ(engine, RING_START),
ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
@@ -2398,8 +2650,6 @@ static void process_csb(struct intel_engine_cs *engine)
regs[CTX_RING_START],
regs[CTX_RING_HEAD],
regs[CTX_RING_TAIL]);
-
- GEM_BUG_ON("context completed before request");
}
execlists_schedule_out(*execlists->active++);
@@ -2533,11 +2783,13 @@ unlock:
static bool hold_request(const struct i915_request *rq)
{
struct i915_dependency *p;
+ bool result = false;
/*
* If one of our ancestors is on hold, we must also be on hold,
* otherwise we will bypass it and execute before it.
*/
+ rcu_read_lock();
for_each_signaler(p, rq) {
const struct i915_request *s =
container_of(p->signaler, typeof(*s), sched);
@@ -2545,11 +2797,13 @@ static bool hold_request(const struct i915_request *rq)
if (s->engine != rq->engine)
continue;
- if (i915_request_on_hold(s))
- return true;
+ result = i915_request_on_hold(s);
+ if (result)
+ break;
}
+ rcu_read_unlock();
- return false;
+ return result;
}
static void __execlists_unhold(struct i915_request *rq)
@@ -2575,6 +2829,10 @@ static void __execlists_unhold(struct i915_request *rq)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ /* Propagate any change in error status */
+ if (rq->fence.error)
+ i915_request_set_error_once(w, rq->fence.error);
+
if (w->engine != rq->engine)
continue;
@@ -2681,6 +2939,45 @@ err_cap:
return NULL;
}
+static struct i915_request *
+active_context(struct intel_engine_cs *engine, u32 ccid)
+{
+ const struct intel_engine_execlists * const el = &engine->execlists;
+ struct i915_request * const *port, *rq;
+
+ /*
+ * Use the most recent result from process_csb(), but just in case
+ * we trigger an error (via interrupt) before the first CS event has
+ * been written, peek at the next submission.
+ */
+
+ for (port = el->active; (rq = *port); port++) {
+ if (rq->context->lrc.ccid == ccid) {
+ ENGINE_TRACE(engine,
+ "ccid found at active:%zd\n",
+ port - el->active);
+ return rq;
+ }
+ }
+
+ for (port = el->pending; (rq = *port); port++) {
+ if (rq->context->lrc.ccid == ccid) {
+ ENGINE_TRACE(engine,
+ "ccid found at pending:%zd\n",
+ port - el->pending);
+ return rq;
+ }
+ }
+
+ ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
+ return NULL;
+}
+
+static u32 active_ccid(struct intel_engine_cs *engine)
+{
+ return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
+}
+
static bool execlists_capture(struct intel_engine_cs *engine)
{
struct execlists_capture *cap;
@@ -2698,7 +2995,7 @@ static bool execlists_capture(struct intel_engine_cs *engine)
return true;
spin_lock_irq(&engine->active.lock);
- cap->rq = execlists_active(&engine->execlists);
+ cap->rq = active_context(engine, active_ccid(engine));
if (cap->rq) {
cap->rq = active_request(cap->rq->context->timeline, cap->rq);
cap->rq = i915_request_get_rcu(cap->rq);
@@ -2846,10 +3143,14 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
if (reset_in_progress(execlists))
return; /* defer until we restart the engine following reset */
- if (execlists->tasklet.func == execlists_submission_tasklet)
- __execlists_submission_tasklet(engine);
- else
- tasklet_hi_schedule(&execlists->tasklet);
+ /* Hopefully we clear execlists->pending[] to let us through */
+ if (READ_ONCE(execlists->pending[0]) &&
+ tasklet_trylock(&execlists->tasklet)) {
+ process_csb(engine);
+ tasklet_unlock(&execlists->tasklet);
+ }
+
+ __execlists_submission_tasklet(engine);
}
static void submit_queue(struct intel_engine_cs *engine,
@@ -2935,19 +3236,139 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
- dev_err_once(engine->i915->drm.dev,
+ drm_err_once(&engine->i915->drm,
"%s context redzone overwritten!\n",
engine->name);
}
static void execlists_context_unpin(struct intel_context *ce)
{
- check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+ check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
ce->engine);
i915_gem_object_unpin_map(ce->state->obj);
}
+static u32 *
+gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ CTX_TIMESTAMP * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
+{
+ GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+{
+ GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_cmd_buf_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static inline u32 context_wa_bb_offset(const struct intel_context *ce)
+{
+ return PAGE_SIZE * ce->wa_bb_page;
+}
+
+static u32 *context_indirect_bb(const struct intel_context *ce)
+{
+ void *ptr;
+
+ GEM_BUG_ON(!ce->wa_bb_page);
+
+ ptr = ce->lrc_reg_state;
+ ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+ ptr += context_wa_bb_offset(ce);
+
+ return ptr;
+}
+
+static void
+setup_indirect_ctx_bb(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 *(*emit)(const struct intel_context *, u32 *))
+{
+ u32 * const start = context_indirect_bb(ce);
+ u32 *cs;
+
+ cs = emit(ce, start);
+ GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+ while ((unsigned long)cs % CACHELINE_BYTES)
+ *cs++ = MI_NOOP;
+
+ lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
+ i915_ggtt_offset(ce->state) +
+ context_wa_bb_offset(ce),
+ (cs - start) * sizeof(*cs));
+}
+
static void
__execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine,
@@ -2962,6 +3383,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = head;
regs[CTX_RING_TAIL] = ring->tail;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
/* RPCS */
if (engine->class == RENDER_CLASS) {
@@ -2970,6 +3392,18 @@ __execlists_update_reg_state(const struct intel_context *ce,
i915_oa_init_reg_state(ce, engine);
}
+
+ if (ce->wa_bb_page) {
+ u32 *(*fn)(const struct intel_context *ce, u32 *cs);
+
+ fn = gen12_emit_indirect_ctx_xcs;
+ if (ce->engine->class == RENDER_CLASS)
+ fn = gen12_emit_indirect_ctx_rcs;
+
+ /* Mutually exclusive wrt to global indirect bb */
+ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+ setup_indirect_ctx_bb(ce, engine, fn);
+ }
}
static int
@@ -2987,8 +3421,8 @@ __execlists_context_pin(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
- ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
return 0;
@@ -3016,7 +3450,7 @@ static void execlists_context_reset(struct intel_context *ce)
ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -3036,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
if (!i915_request_timeline(rq)->has_initial_breadcrumb)
return 0;
@@ -3062,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
/* Record the updated position of the request's payload */
rq->infix = intel_ring_offset(rq, cs);
+ __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
+
+ return 0;
+}
+
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
+ int err, i;
+ u32 *cs;
+
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+ /*
+ * Beware ye of the dragons, this sequence is magic!
+ *
+ * Small changes to this sequence can cause anything from
+ * GPU hangs to forcewake errors and machine lockups!
+ */
+
+ /* Flush any residual operations from the context load */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
+
+ cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Ensure the LRI have landed before we invalidate & continue */
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+ u32 base = engine->mmio_base;
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
return 0;
}
@@ -3086,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request)
* to cancel/unwind this request now.
*/
+ if (!i915_vm_is_4lvl(request->context->vm)) {
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+ }
+
/* Unconditionally invalidate GPU caches and TLBs. */
ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
@@ -3386,7 +3877,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
ret = lrc_setup_wa_ctx(engine);
if (ret) {
- DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
+ drm_dbg(&engine->i915->drm,
+ "Failed to setup context WA page: %d\n", ret);
return ret;
}
@@ -3419,6 +3911,72 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
+static void reset_csb_pointers(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ const unsigned int reset_value = execlists->csb_size - 1;
+
+ ring_set_paused(engine, 0);
+
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ 0xffff << 16 | reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+ /*
+ * After a reset, the HW starts writing into CSB entry [0]. We
+ * therefore have to set our HEAD pointer back one entry so that
+ * the *first* entry we check is entry 0. To complicate this further,
+ * as we don't wait for the first interrupt after reset, we have to
+ * fake the HW write to point back to the last entry so that our
+ * inline comparison of our cached head position against the last HW
+ * write works even before the first interrupt.
+ */
+ execlists->csb_head = reset_value;
+ WRITE_ONCE(*execlists->csb_write, reset_value);
+ wmb(); /* Make sure this is visible to HW (paranoia?) */
+
+ invalidate_csb_entries(&execlists->csb_status[0],
+ &execlists->csb_status[reset_value]);
+
+ /* Once more for luck and our trusty paranoia */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ 0xffff << 16 | reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+ GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
+}
+
+static void execlists_sanitize(struct intel_engine_cs *engine)
+{
+ /*
+ * Poison residual state on resume, in case the suspend didn't!
+ *
+ * We have to assume that across suspend/resume (or other loss
+ * of control) that the contents of our pinned buffers has been
+ * lost, replaced by garbage. Since this doesn't always happen,
+ * let's poison such state so that we more quickly spot when
+ * we falsely assume it has been preserved.
+ */
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
+
+ reset_csb_pointers(engine);
+
+ /*
+ * The kernel_context HWSP is stored in the status_page. As above,
+ * that may be lost on resume/initialisation, and so we need to
+ * reset the value in the HWSP.
+ */
+ intel_timeline_reset_seqno(engine->kernel_context->timeline);
+
+ /* And scrub the dirty cachelines for the HWSP */
+ clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+}
+
static void enable_error_interrupt(struct intel_engine_cs *engine)
{
u32 status;
@@ -3429,7 +3987,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
status = ENGINE_READ(engine, RING_ESR);
if (unlikely(status)) {
- dev_err(engine->i915->drm.dev,
+ drm_err(&engine->i915->drm,
"engine '%s' resumed still in error: %08x\n",
engine->name, status);
__intel_gt_reset(engine->gt, engine->mask);
@@ -3485,7 +4043,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
enable_error_interrupt(engine);
- engine->context_tag = 0;
+ engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -3493,7 +4051,8 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
bool unexpected = false;
if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
- DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
+ drm_dbg(&engine->i915->drm,
+ "STOP_RING still set in RING_MI_MODE\n");
unexpected = true;
}
@@ -3553,39 +4112,10 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
*
* FIXME: Wa for more modern gens needs to be validated
*/
+ ring_set_paused(engine, 1);
intel_engine_stop_cs(engine);
-}
-
-static void reset_csb_pointers(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- const unsigned int reset_value = execlists->csb_size - 1;
-
- ring_set_paused(engine, 0);
-
- /*
- * After a reset, the HW starts writing into CSB entry [0]. We
- * therefore have to set our HEAD pointer back one entry so that
- * the *first* entry we check is entry 0. To complicate this further,
- * as we don't wait for the first interrupt after reset, we have to
- * fake the HW write to point back to the last entry so that our
- * inline comparison of our cached head position against the last HW
- * write works even before the first interrupt.
- */
- execlists->csb_head = reset_value;
- WRITE_ONCE(*execlists->csb_write, reset_value);
- wmb(); /* Make sure this is visible to HW (paranoia?) */
- /*
- * Sometimes Icelake forgets to reset its pointers on a GPU reset.
- * Bludgeon them with a mmio update to be sure.
- */
- ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
- reset_value << 8 | reset_value);
- ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
-
- invalidate_csb_entries(&execlists->csb_status[0],
- &execlists->csb_status[reset_value]);
+ engine->execlists.reset_ccid = active_ccid(engine);
}
static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
@@ -3628,13 +4158,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* its request, it was still running at the time of the
* reset and will have been clobbered.
*/
- rq = execlists_active(execlists);
+ rq = active_context(engine, engine->execlists.reset_ccid);
if (!rq)
goto unwind;
- /* We still have requests in-flight; the engine should be active */
- GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
-
ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
@@ -3644,8 +4171,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
goto out_replay;
}
+ /* We still have requests in-flight; the engine should be active */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
/* Context has requests still in-flight; it should not be idle! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
+
rq = active_request(ce->timeline, rq);
head = intel_ring_wrap(ce->ring, rq->head);
GEM_BUG_ON(head == ce->ring->tail);
@@ -3677,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* image back to the expected values to skip over the guilty request.
*/
__i915_request_reset(rq, stalled);
- if (!stalled)
- goto out_replay;
/*
* We want a simple context + ring to execute the breadcrumb update.
@@ -3688,15 +4217,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- GEM_BUG_ON(!intel_context_is_pinned(ce));
- restore_default_state(ce, engine);
-
out_replay:
ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine, head);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -3719,7 +4245,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
static void nop_submission_tasklet(unsigned long data)
{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+
/* The driver is wedged; don't process any more events. */
+ WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
}
static void execlists_reset_cancel(struct intel_engine_cs *engine)
@@ -4053,6 +4582,42 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+ static const i915_reg_t vd[] = {
+ GEN12_VD0_AUX_NV,
+ GEN12_VD1_AUX_NV,
+ GEN12_VD2_AUX_NV,
+ GEN12_VD3_AUX_NV,
+ };
+
+ static const i915_reg_t ve[] = {
+ GEN12_VE0_AUX_NV,
+ GEN12_VE1_AUX_NV,
+ };
+
+ if (engine->class == VIDEO_DECODE_CLASS)
+ return vd[engine->instance];
+
+ if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+ return ve[engine->instance];
+
+ GEM_BUG_ON("unknown aux_inv_reg\n");
+
+ return INVALID_MMIO_REG;
+}
+
+static u32 *
+gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(inv_reg);
+ *cs++ = AUX_INV;
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
static int gen12_emit_flush_render(struct i915_request *request,
u32 mode)
{
@@ -4061,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
u32 *cs;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl */
flags |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
@@ -4078,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs,
+ PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+ flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -4093,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
- cs = intel_ring_begin(request, 8);
+ cs = intel_ring_begin(request, 8 + 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -4113,29 +4679,62 @@ static int gen12_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ /* hsdes: 1809175790 */
+ cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
+ }
- /*
- * Wa_1604544889:tgl
- */
- if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
- flags = 0;
- flags |= PIPE_CONTROL_CS_STALL;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+ return 0;
+}
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
+static int gen12_emit_flush(struct i915_request *request, u32 mode)
+{
+ intel_engine_mask_t aux_inv = 0;
+ u32 cmd, *cs;
+
+ if (mode & EMIT_INVALIDATE)
+ aux_inv = request->engine->mask & ~BIT(BCS0);
+
+ cs = intel_ring_begin(request,
+ 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
- cs = intel_ring_begin(request, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
+ cmd = MI_FLUSH_DW + 1;
+
+ /* We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
- cs = gen8_emit_pipe_control(cs, flags,
- LRC_PPHWSP_SCRATCH_ADDR);
- intel_ring_advance(request, cs);
+ if (mode & EMIT_INVALIDATE) {
+ cmd |= MI_INVALIDATE_TLB;
+ if (request->engine->class == VIDEO_DECODE_CLASS)
+ cmd |= MI_INVALIDATE_BSD;
+ }
+
+ *cs++ = cmd;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+ *cs++ = 0; /* upper addr */
+ *cs++ = 0; /* value */
+
+ if (aux_inv) { /* hsdes: 1809175790 */
+ struct intel_engine_cs *engine;
+ unsigned int tmp;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+ for_each_engine_masked(engine, request->engine->gt,
+ aux_inv, tmp) {
+ *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+ *cs++ = AUX_INV;
}
+ *cs++ = MI_NOOP;
}
+ intel_ring_advance(request, cs);
return 0;
}
@@ -4169,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
}
static __always_inline u32*
-gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
- u32 *cs)
+gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
@@ -4184,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
return gen8_emit_wa_tail(request, cs);
}
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- 0);
+ u32 addr = i915_request_active_timeline(request)->hwsp_offset;
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+}
+
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
@@ -4209,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_fini_breadcrumb_tail(request, cs);
}
static u32 *
@@ -4225,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_fini_breadcrumb_tail(request, cs);
}
/*
@@ -4263,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
}
static __always_inline u32*
-gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
@@ -4277,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
-static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- 0);
-
- return gen12_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
static u32 *
gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write_rcs(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- /* Wa_1409600907:tgl */
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_HDC_PIPELINE_FLUSH);
+ cs = gen12_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ /* Wa_1409600907:tgl */
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
- return gen12_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_tail(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
@@ -4329,8 +4925,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ }
}
if (INTEL_GEN(engine->i915) >= 12)
@@ -4352,6 +4951,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
static void execlists_release(struct intel_engine_cs *engine)
{
+ engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
+
execlists_shutdown(engine);
intel_engine_cleanup_common(engine);
@@ -4371,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
- if (INTEL_GEN(engine->i915) >= 12)
+ if (INTEL_GEN(engine->i915) >= 12) {
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
-
+ engine->emit_flush = gen12_emit_flush;
+ }
engine->set_default_submission = intel_execlists_set_default_submission;
if (INTEL_GEN(engine->i915) < 11) {
@@ -4409,6 +5011,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+ engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4453,7 +5056,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
* because we only expect rare glitches but nothing
* critical to prevent us from using GPU
*/
- DRM_ERROR("WA batch buffer initialization failed\n");
+ drm_err(&i915->drm, "WA batch buffer initialization failed\n");
if (HAS_LOGICAL_RING_ELSQ(i915)) {
execlists->submit_reg = uncore->regs +
@@ -4476,48 +5079,18 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
- reset_csb_pointers(engine);
+ if (INTEL_GEN(engine->i915) >= 11) {
+ execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+ execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
+ }
/* Finally, take ownership and responsibility for cleanup! */
+ engine->sanitize = execlists_sanitize;
engine->release = execlists_release;
return 0;
}
-static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
-{
- u32 indirect_ctx_offset;
-
- switch (INTEL_GEN(engine->i915)) {
- default:
- MISSING_CASE(INTEL_GEN(engine->i915));
- /* fall through */
- case 12:
- indirect_ctx_offset =
- GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 11:
- indirect_ctx_offset =
- GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 10:
- indirect_ctx_offset =
- GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 9:
- indirect_ctx_offset =
- GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 8:
- indirect_ctx_offset =
- GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- }
-
- return indirect_ctx_offset;
-}
-
-
static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
@@ -4535,30 +5108,27 @@ static void init_common_reg_state(u32 * const regs,
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_TIMESTAMP] = 0;
}
static void init_wa_bb_reg_state(u32 * const regs,
- const struct intel_engine_cs *engine,
- u32 pos_bb_per_ctx)
+ const struct intel_engine_cs *engine)
{
const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
if (wa_ctx->per_ctx.size) {
const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- regs[pos_bb_per_ctx] =
+ GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+ regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
}
if (wa_ctx->indirect_ctx.size) {
- const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[pos_bb_per_ctx + 2] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- regs[pos_bb_per_ctx + 4] =
- intel_lr_indirect_ctx_offset(engine) << 6;
+ lrc_ring_setup_indirect_ctx(regs, engine,
+ i915_ggtt_offset(wa_ctx->vma) +
+ wa_ctx->indirect_ctx.offset,
+ wa_ctx->indirect_ctx.size);
}
}
@@ -4607,10 +5177,7 @@ static void execlists_init_reg_state(u32 *regs,
init_common_reg_state(regs, engine, ring, inhibit);
init_ppgtt_reg_state(regs, vm_alias(ce->vm));
- init_wa_bb_reg_state(regs, engine,
- INTEL_GEN(engine->i915) >= 12 ?
- GEN12_CTX_BB_PER_CTX_PTR :
- CTX_BB_PER_CTX_PTR);
+ init_wa_bb_reg_state(regs, engine);
__reset_stop_ring(regs, engine);
}
@@ -4623,29 +5190,18 @@ populate_lr_context(struct intel_context *ce,
{
bool inhibit = true;
void *vaddr;
- int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
- return ret;
+ drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
+ return PTR_ERR(vaddr);
}
set_redzone(vaddr, engine);
if (engine->default_state) {
- void *defaults;
-
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (IS_ERR(defaults)) {
- ret = PTR_ERR(defaults);
- goto err_unpin_ctx;
- }
-
- memcpy(vaddr, defaults, engine->context_size);
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_read(engine->default_state, 0,
+ vaddr, engine->context_size);
__set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
@@ -4657,14 +5213,12 @@ populate_lr_context(struct intel_context *ce,
* The second page of the context object contains some registers which
* must be set up prior to the first execution.
*/
- execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
ce, engine, ring, inhibit);
- ret = 0;
-err_unpin_ctx:
__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
i915_gem_object_unpin_map(ctx_obj);
- return ret;
+ return 0;
}
static int __execlists_context_alloc(struct intel_context *ce,
@@ -4682,6 +5236,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
+ if (INTEL_GEN(engine->i915) == 12) {
+ ce->wa_bb_page = context_size / PAGE_SIZE;
+ context_size += PAGE_SIZE;
+ }
+
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(ctx_obj))
return PTR_ERR(ctx_obj);
@@ -4721,7 +5280,8 @@ static int __execlists_context_alloc(struct intel_context *ce,
ret = populate_lr_context(ce, ctx_obj, engine, ring);
if (ret) {
- DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+ drm_dbg(&engine->i915->drm,
+ "Failed to populate LRC: %d\n", ret);
goto error_ring_free;
}
@@ -4774,6 +5334,8 @@ static void virtual_context_destroy(struct kref *kref)
__execlists_context_fini(&ve->context);
intel_context_fini(&ve->context);
+ intel_engine_free_request_pool(&ve->base);
+
kfree(ve->bonds);
kfree(ve);
}
@@ -4873,7 +5435,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = rq->execution_mask;
if (unlikely(!mask)) {
/* Invalid selection, submit to a random engine in error */
- i915_request_skip(rq, -ENODEV);
+ i915_request_set_error_once(rq, -ENODEV);
mask = ve->siblings[0]->mask;
}
@@ -4887,7 +5449,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
static void virtual_submission_tasklet(unsigned long data)
{
struct virtual_engine * const ve = (struct virtual_engine *)data;
- const int prio = ve->base.execlists.queue_priority_hint;
+ const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
intel_engine_mask_t mask;
unsigned int n;
@@ -4898,12 +5460,15 @@ static void virtual_submission_tasklet(unsigned long data)
return;
local_irq_disable();
- for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
- struct intel_engine_cs *sibling = ve->siblings[n];
+ for (n = 0; n < ve->num_siblings; n++) {
+ struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
struct ve_node * const node = &ve->nodes[sibling->id];
struct rb_node **parent, *rb;
bool first;
+ if (!READ_ONCE(ve->request))
+ break; /* already handled by a sibling's tasklet */
+
if (unlikely(!(mask & sibling->mask))) {
if (!RB_EMPTY_NODE(&node->rb)) {
spin_lock(&sibling->active.lock);
@@ -4954,10 +5519,8 @@ static void virtual_submission_tasklet(unsigned long data)
submit_engine:
GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
node->prio = prio;
- if (first && prio > sibling->execlists.queue_priority_hint) {
- sibling->execlists.queue_priority_hint = prio;
+ if (first && prio > sibling->execlists.queue_priority_hint)
tasklet_hi_schedule(&sibling->execlists.tasklet);
- }
spin_unlock(&sibling->active.lock);
}
@@ -5283,11 +5846,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
show_request(m, last, "\t\tE ");
}
- last = NULL;
- count = 0;
+ if (execlists->switch_priority_hint != INT_MIN)
+ drm_printf(m, "\t\tSwitch priority hint: %d\n",
+ READ_ONCE(execlists->switch_priority_hint));
if (execlists->queue_priority_hint != INT_MIN)
drm_printf(m, "\t\tQueue priority hint: %d\n",
- execlists->queue_priority_hint);
+ READ_ONCE(execlists->queue_priority_hint));
+
+ last = NULL;
+ count = 0;
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
int i;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index dfbc214e14f5..91fd8e452d9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -90,6 +90,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine);
#define LRC_PPHWSP_SZ (1)
/* After the PPHWSP we have the logical state for the context */
#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
+#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
/* Space within PPHWSP reserved to be used as scratch */
#define LRC_PPHWSP_SCRATCH 0x34
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index d39b72590e40..93cb6c460508 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,14 +9,13 @@
#include <linux/types.h>
-/* GEN8 to GEN11 Reg State Context */
+/* GEN8 to GEN12 Reg State Context */
#define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1)
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
-#define CTX_BB_PER_CTX_PTR (0x18 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
#define CTX_PDP3_LDW (0x26 + 1)
@@ -30,9 +29,6 @@
#define GEN9_CTX_RING_MI_MODE 0x54
-/* GEN12+ Reg State Context */
-#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1)
-
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index bef132709854..ab675d35030d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -7,6 +7,7 @@
#include <linux/pm_runtime.h>
#include "i915_drv.h"
+#include "i915_vgpu.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
@@ -112,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
struct intel_uncore *uncore = rc6_to_uncore(rc6);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 rc6_mode;
/* 2b: Program RC6 thresholds.*/
if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
@@ -164,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
/* 3a: Enable RC6 */
set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
- /* WaRsUseTimeoutMode:cnl (pre-prod) */
- if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
- rc6_mode = GEN7_RC_CTL_TO_MODE;
- else
- rc6_mode = GEN6_RC_CTL_EI_MODE(1);
rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
GEN6_RC_CTL_RC6_ENABLE |
- rc6_mode;
+ GEN6_RC_CTL_EI_MODE(1);
/*
* WaRsDisableCoarsePowerGating:skl,cnl
@@ -245,16 +240,18 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
&rc6vids, NULL);
if (IS_GEN(i915, 6) && ret) {
- DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+ drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
} else if (IS_GEN(i915, 6) &&
(GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
- DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
- GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+ drm_dbg(&i915->drm,
+ "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+ GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
rc6vids &= 0xffff00;
rc6vids |= GEN6_ENCODE_RC6_VID(450);
ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
if (ret)
- DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+ drm_err(&i915->drm,
+ "Couldn't fix incorrect rc6 voltage\n");
}
}
@@ -262,14 +259,15 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
static int chv_rc6_init(struct intel_rc6 *rc6)
{
struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
resource_size_t pctx_paddr, paddr;
resource_size_t pctx_size = 32 * SZ_1K;
u32 pcbr;
pcbr = intel_uncore_read(uncore, VLV_PCBR);
if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
- DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
- paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+ drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
+ paddr = i915->dsm.end + 1 - pctx_size;
GEM_BUG_ON(paddr > U32_MAX);
pctx_paddr = (paddr & ~4095);
@@ -303,7 +301,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
goto out;
}
- DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+ drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
/*
* From the Gunit register HAS:
@@ -315,14 +313,15 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
*/
pctx = i915_gem_object_create_stolen(i915, pctx_size);
if (IS_ERR(pctx)) {
- DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+ drm_dbg(&i915->drm,
+ "not enough stolen space for PCTX, disabling\n");
return PTR_ERR(pctx);
}
- GEM_BUG_ON(range_overflows_t(u64,
- i915->dsm.start,
- pctx->stolen->start,
- U32_MAX));
+ GEM_BUG_ON(range_overflows_end_t(u64,
+ i915->dsm.start,
+ pctx->stolen->start,
+ U32_MAX));
pctx_paddr = i915->dsm.start + pctx->stolen->start;
intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
@@ -397,14 +396,14 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
rc_sw_target &= RC_SW_TARGET_STATE_MASK;
rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
- DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+ drm_dbg(&i915->drm, "BIOS enabled RC states: "
"HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
rc_sw_target);
if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
- DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+ drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
enable_rc6 = false;
}
@@ -416,7 +415,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
- DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+ drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
enable_rc6 = false;
}
@@ -424,24 +423,25 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
(intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
(intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
(intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
- DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+ drm_dbg(&i915->drm,
+ "Engine Idle wait time not set properly.\n");
enable_rc6 = false;
}
if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
!intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
!intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
- DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+ drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
enable_rc6 = false;
}
if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
- DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+ drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
enable_rc6 = false;
}
if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
- DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+ drm_dbg(&i915->drm, "GPM control not setup properly.\n");
enable_rc6 = false;
}
@@ -462,7 +462,7 @@ static bool rc6_supported(struct intel_rc6 *rc6)
return false;
if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"RC6 and powersaving disabled by BIOS\n");
return false;
}
@@ -494,7 +494,7 @@ static bool pctx_corrupted(struct intel_rc6 *rc6)
if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
return false;
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"RC6 context corruption, disabling runtime power management\n");
return true;
}
@@ -602,6 +602,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
void intel_rc6_park(struct intel_rc6 *rc6)
{
struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ unsigned int target;
if (!rc6->enabled)
return;
@@ -616,7 +617,14 @@ void intel_rc6_park(struct intel_rc6 *rc6)
/* Turn off the HW timers and go directly to rc6 */
set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
- set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+
+ if (HAS_RC6pp(rc6_to_i915(rc6)))
+ target = 0x6; /* deepest rc6 */
+ else if (HAS_RC6p(rc6_to_i915(rc6)))
+ target = 0x5; /* deep rc6 */
+ else
+ target = 0x4; /* normal rc6 */
+ set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
}
void intel_rc6_disable(struct intel_rc6 *rc6)
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 5954ecc3207f..f59e7875cc5e 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -102,7 +102,7 @@ static int render_state_setup(struct intel_renderstate *so,
}
if (rodata->reloc[reloc_index] != -1) {
- DRM_ERROR("only %d relocs resolved\n", reloc_index);
+ drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index);
goto err;
}
@@ -194,7 +194,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
- goto err_vma;
+ goto err_obj;
err = render_state_setup(so, engine->i915);
if (err)
@@ -204,8 +204,6 @@ int intel_renderstate_init(struct intel_renderstate *so,
err_unpin:
i915_vma_unpin(so->vma);
-err_vma:
- i915_vma_close(so->vma);
err_obj:
i915_gem_object_put(obj);
so->vma = NULL;
@@ -221,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so,
if (!so->vma)
return 0;
+ i915_vma_lock(so->vma);
+ err = i915_request_await_object(rq, so->vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(so->vma, rq, 0);
+ i915_vma_unlock(so->vma);
+ if (err)
+ return err;
+
err = engine->emit_bb_start(rq,
so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
@@ -235,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so,
return err;
}
- i915_vma_lock(so->vma);
- err = i915_request_await_object(rq, so->vma->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(so->vma, rq, 0);
- i915_vma_unlock(so->vma);
-
- return err;
+ return 0;
}
void intel_renderstate_fini(struct intel_renderstate *so)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index aef6ab58d7d9..39070b514e65 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq)
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->context == hung_ctx)
- i915_request_skip(rq, -EIO);
+ if (rq->context == hung_ctx) {
+ i915_request_set_error_once(rq, -EIO);
+ __i915_request_skip(rq);
+ }
}
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -86,19 +88,18 @@ static bool mark_guilty(struct i915_request *rq)
bool banned;
int i;
+ if (intel_context_is_closed(rq->context)) {
+ intel_context_set_banned(rq->context);
+ return true;
+ }
+
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
if (!ctx)
- return false;
-
- if (i915_gem_context_is_closed(ctx)) {
- intel_context_set_banned(rq->context);
- banned = true;
- goto out;
- }
+ return intel_context_is_banned(rq->context);
atomic_inc(&ctx->guilty_count);
@@ -108,7 +109,7 @@ static bool mark_guilty(struct i915_request *rq)
goto out;
}
- dev_notice(ctx->i915->drm.dev,
+ drm_notice(&ctx->i915->drm,
"%s context reset due to GPU hang\n",
ctx->name);
@@ -154,11 +155,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rcu_read_lock(); /* protect the GEM context */
if (guilty) {
- i915_request_skip(rq, -EIO);
+ i915_request_set_error_once(rq, -EIO);
+ __i915_request_skip(rq);
if (mark_guilty(rq))
engine_skip_context(rq);
} else {
- dma_fence_set_error(&rq->fence, -EAGAIN);
+ i915_request_set_error_once(rq, -EAGAIN);
mark_innocent(rq);
}
rcu_read_unlock();
@@ -753,7 +755,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, stalled_mask & engine->mask);
- i915_gem_restore_fences(gt->ggtt);
+ intel_ggtt_restore_fences(gt->ggtt);
return err;
}
@@ -785,7 +787,7 @@ static void nop_submit_request(struct i915_request *request)
unsigned long flags;
RQ_TRACE(request, "-EIO\n");
- dma_fence_set_error(&request->fence, -EIO);
+ i915_request_set_error_once(request, -EIO);
spin_lock_irqsave(&engine->active.lock, flags);
__i915_request_submit(request);
@@ -1029,7 +1031,7 @@ void intel_gt_reset(struct intel_gt *gt,
goto unlock;
if (reason)
- dev_notice(gt->i915->drm.dev,
+ drm_notice(&gt->i915->drm,
"Resetting chip for %s\n", reason);
atomic_inc(&gt->i915->gpu_error.reset_count);
@@ -1037,7 +1039,7 @@ void intel_gt_reset(struct intel_gt *gt,
if (!intel_has_gpu_reset(gt)) {
if (i915_modparams.reset)
- dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
+ drm_err(&gt->i915->drm, "GPU reset not supported\n");
else
drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
goto error;
@@ -1047,7 +1049,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_runtime_pm_disable_interrupts(gt->i915);
if (do_reset(gt, stalled_mask)) {
- dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
+ drm_err(&gt->i915->drm, "Failed to reset chip\n");
goto taint;
}
@@ -1109,7 +1111,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
/**
* intel_engine_reset - reset GPU engine to recover from a hang
* @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
+ * @msg: reason for GPU reset; or NULL for no drm_notice()
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
@@ -1134,7 +1136,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
reset_prepare_engine(engine);
if (msg)
- dev_notice(engine->i915->drm.dev,
+ drm_notice(&engine->i915->drm,
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
@@ -1379,7 +1381,7 @@ static void intel_wedge_me(struct work_struct *work)
{
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
- dev_err(w->gt->i915->drm.dev,
+ drm_err(&w->gt->i915->drm,
"%s timed out, cancelling all in-flight rendering.\n",
w->name);
intel_gt_set_wedged(w->gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index 5bdce24994aa..cc0ebca65167 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -88,6 +88,8 @@ static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
static inline void
assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
{
+ unsigned int head = READ_ONCE(ring->head);
+
GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
/*
@@ -105,8 +107,7 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
* into the same cacheline as ring->head.
*/
#define cacheline(a) round_down(a, CACHELINE_BYTES)
- GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
- tail < ring->head);
+ GEM_BUG_ON(cacheline(tail) == cacheline(head) && tail < head);
#undef cacheline
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index f70b903a98bc..ca7286e58409 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -29,11 +29,10 @@
#include <linux/log2.h>
-#include <drm/i915_drm.h>
-
#include "gem/i915_gem_context.h"
#include "gen6_ppgtt.h"
+#include "gen7_renderclear.h"
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_context.h"
@@ -43,6 +42,7 @@
#include "intel_reset.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
+#include "shmem_utils.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -578,8 +578,9 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
RING_INSTPM(engine->mmio_base),
INSTPM_SYNC_FLUSH, 0,
1000))
- DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
- engine->name);
+ drm_err(&dev_priv->drm,
+ "%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+ engine->name);
}
static void ring_setup_status_page(struct intel_engine_cs *engine)
@@ -602,8 +603,9 @@ static bool stop_ring(struct intel_engine_cs *engine)
MODE_IDLE,
MODE_IDLE,
1000)) {
- DRM_ERROR("%s : timed out trying to stop ring\n",
- engine->name);
+ drm_err(&dev_priv->drm,
+ "%s : timed out trying to stop ring\n",
+ engine->name);
/*
* Sometimes we observe that the idle flag is not
@@ -662,22 +664,23 @@ static int xcs_resume(struct intel_engine_cs *engine)
/* WaClearRingBufHeadRegAtInit:ctg,elk */
if (!stop_ring(engine)) {
/* G45 ring initialization often fails to reset head to zero */
- DRM_DEBUG_DRIVER("%s head not reset to zero "
+ drm_dbg(&dev_priv->drm, "%s head not reset to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ engine->name,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_HEAD),
+ ENGINE_READ(engine, RING_TAIL),
+ ENGINE_READ(engine, RING_START));
+
+ if (!stop_ring(engine)) {
+ drm_err(&dev_priv->drm,
+ "failed to set %s head to zero "
"ctl %08x head %08x tail %08x start %08x\n",
engine->name,
ENGINE_READ(engine, RING_CTL),
ENGINE_READ(engine, RING_HEAD),
ENGINE_READ(engine, RING_TAIL),
ENGINE_READ(engine, RING_START));
-
- if (!stop_ring(engine)) {
- DRM_ERROR("failed to set %s head to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_HEAD),
- ENGINE_READ(engine, RING_TAIL),
- ENGINE_READ(engine, RING_START));
ret = -EIO;
goto out;
}
@@ -720,7 +723,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
50)) {
- DRM_ERROR("%s initialization failed "
+ drm_err(&dev_priv->drm, "%s initialization failed "
"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
engine->name,
ENGINE_READ(engine, RING_CTL),
@@ -897,9 +900,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
/* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(request))
- dma_fence_set_error(&request->fence, -EIO);
-
+ i915_request_set_error_once(request, -EIO);
i915_request_mark_complete(request);
}
@@ -1241,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
if (engine->default_state) {
- void *defaults, *vaddr;
+ void *vaddr;
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
@@ -1249,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine)
goto err_obj;
}
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (IS_ERR(defaults)) {
- err = PTR_ERR(defaults);
- goto err_map;
- }
-
- memcpy(vaddr, defaults, engine->context_size);
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_read(engine->default_state, 0,
+ vaddr, engine->context_size);
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
@@ -1271,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine)
return vma;
-err_map:
- i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
return ERR_PTR(err);
@@ -1360,7 +1352,9 @@ static int load_pd_dir(struct i915_request *rq,
return rq->engine->emit_flush(rq, EMIT_FLUSH);
}
-static inline int mi_set_context(struct i915_request *rq, u32 flags)
+static inline int mi_set_context(struct i915_request *rq,
+ struct intel_context *ce,
+ u32 flags)
{
struct drm_i915_private *i915 = rq->i915;
struct intel_engine_cs *engine = rq->engine;
@@ -1435,7 +1429,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(rq->context->state) | flags;
+ *cs++ = i915_ggtt_offset(ce->state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@@ -1550,13 +1544,56 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
}
+static int clear_residuals(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ int ret;
+
+ ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
+ if (ret)
+ return ret;
+
+ if (engine->kernel_context->state) {
+ ret = mi_set_context(rq,
+ engine->kernel_context,
+ MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
+ if (ret)
+ return ret;
+ }
+
+ ret = engine->emit_bb_start(rq,
+ engine->wa_ctx.vma->node.start, 0,
+ 0);
+ if (ret)
+ return ret;
+
+ ret = engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ return ret;
+
+ /* Always invalidate before the next switch_mm() */
+ return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
static int switch_context(struct i915_request *rq)
{
+ struct intel_engine_cs *engine = rq->engine;
struct intel_context *ce = rq->context;
+ void **residuals = NULL;
int ret;
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+ if (engine->wa_ctx.vma && ce != engine->kernel_context) {
+ if (engine->wa_ctx.vma->private != ce) {
+ ret = clear_residuals(rq);
+ if (ret)
+ return ret;
+
+ residuals = &engine->wa_ctx.vma->private;
+ }
+ }
+
ret = switch_mm(rq, vm_alias(ce->vm));
if (ret)
return ret;
@@ -1564,7 +1601,7 @@ static int switch_context(struct i915_request *rq)
if (ce->state) {
u32 flags;
- GEM_BUG_ON(rq->engine->id != RCS0);
+ GEM_BUG_ON(engine->id != RCS0);
/* For resource streamer on HSW+ and power context elsewhere */
BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
@@ -1576,7 +1613,7 @@ static int switch_context(struct i915_request *rq)
else
flags |= MI_RESTORE_INHIBIT;
- ret = mi_set_context(rq, flags);
+ ret = mi_set_context(rq, ce, flags);
if (ret)
return ret;
}
@@ -1585,6 +1622,20 @@ static int switch_context(struct i915_request *rq)
if (ret)
return ret;
+ /*
+ * Now past the point of no return, this request _will_ be emitted.
+ *
+ * Or at least this preamble will be emitted, the request may be
+ * interrupted prior to submitting the user payload. If so, we
+ * still submit the "empty" request in order to preserve global
+ * state tracking such as this, our tracking of the current
+ * dirty context.
+ */
+ if (residuals) {
+ intel_context_put(*residuals);
+ *residuals = intel_context_get(ce);
+ }
+
return 0;
}
@@ -1769,6 +1820,11 @@ static void ring_release(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
+ if (engine->wa_ctx.vma) {
+ intel_context_put(engine->wa_ctx.vma->private);
+ i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+ }
+
intel_ring_unpin(engine->legacy.ring);
intel_ring_put(engine->legacy.ring);
@@ -1916,6 +1972,64 @@ static void setup_vecs(struct intel_engine_cs *engine)
engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
}
+static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
+ struct i915_vma * const vma)
+{
+ return gen7_setup_clear_gpr_bb(engine, vma);
+}
+
+static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int size;
+ int err;
+
+ size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
+ if (size <= 0)
+ return size;
+
+ size = ALIGN(size, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(engine->i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ vma->private = intel_context_create(engine); /* dummy residuals */
+ if (IS_ERR(vma->private)) {
+ err = PTR_ERR(vma->private);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (err)
+ goto err_private;
+
+ err = i915_vma_sync(vma);
+ if (err)
+ goto err_unpin;
+
+ err = gen7_ctx_switch_bb_setup(engine, vma);
+ if (err)
+ goto err_unpin;
+
+ engine->wa_ctx.vma = vma;
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(vma);
+err_private:
+ intel_context_put(vma->private);
+err_obj:
+ i915_gem_object_put(obj);
+ return err;
+}
+
int intel_ring_submission_setup(struct intel_engine_cs *engine)
{
struct intel_timeline *timeline;
@@ -1969,11 +2083,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
+ if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) {
+ err = gen7_ctx_switch_bb_init(engine);
+ if (err)
+ goto err_ring_unpin;
+ }
+
/* Finally, take ownership and responsibility for cleanup! */
engine->release = ring_release;
return 0;
+err_ring_unpin:
+ intel_ring_unpin(ring);
err_ring:
intel_ring_put(ring);
err_timeline_unpin:
@@ -1984,3 +2106,7 @@ err:
intel_engine_cleanup_common(engine);
return err;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_ring_submission.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 30ae29b30f11..2f59fc6df3c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -4,14 +4,19 @@
* Copyright © 2019 Intel Corporation
*/
+#include <drm/i915_drm.h>
+
#include "i915_drv.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h"
#include "intel_rps.h"
#include "intel_sideband.h"
#include "../../../platform/x86/intel_ips.h"
+#define BUSY_MAX_EI 20u /* ms */
+
/*
* Lock protecting IPS related data structures
*/
@@ -42,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
intel_uncore_write_fw(uncore, reg, val);
}
+static void rps_timer(struct timer_list *t)
+{
+ struct intel_rps *rps = from_timer(rps, t, timer);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ s64 max_busy[3] = {};
+ ktime_t dt, last;
+
+ for_each_engine(engine, rps_to_gt(rps), id) {
+ s64 busy;
+ int i;
+
+ dt = intel_engine_get_busy_time(engine);
+ last = engine->stats.rps;
+ engine->stats.rps = dt;
+
+ busy = ktime_to_ns(ktime_sub(dt, last));
+ for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
+ if (busy > max_busy[i])
+ swap(busy, max_busy[i]);
+ }
+ }
+
+ dt = ktime_get();
+ last = rps->pm_timestamp;
+ rps->pm_timestamp = dt;
+
+ if (intel_rps_is_active(rps)) {
+ s64 busy;
+ int i;
+
+ dt = ktime_sub(dt, last);
+
+ /*
+ * Our goal is to evaluate each engine independently, so we run
+ * at the lowest clocks required to sustain the heaviest
+ * workload. However, a task may be split into sequential
+ * dependent operations across a set of engines, such that
+ * the independent contributions do not account for high load,
+ * but overall the task is GPU bound. For example, consider
+ * video decode on vcs followed by colour post-processing
+ * on vecs, followed by general post-processing on rcs.
+ * Since multi-engines being active does imply a single
+ * continuous workload across all engines, we hedge our
+ * bets by only contributing a factor of the distributed
+ * load into our busyness calculation.
+ */
+ busy = max_busy[0];
+ for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
+ if (!max_busy[i])
+ break;
+
+ busy += div_u64(max_busy[i], 1 << i);
+ }
+ GT_TRACE(rps_to_gt(rps),
+ "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
+ busy, (int)div64_u64(100 * busy, dt),
+ max_busy[0], max_busy[1], max_busy[2],
+ rps->pm_interval);
+
+ if (100 * busy > rps->power.up_threshold * dt &&
+ rps->cur_freq < rps->max_freq_softlimit) {
+ rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+ rps->pm_interval = 1;
+ schedule_work(&rps->work);
+ } else if (100 * busy < rps->power.down_threshold * dt &&
+ rps->cur_freq > rps->min_freq_softlimit) {
+ rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
+ rps->pm_interval = 1;
+ schedule_work(&rps->work);
+ } else {
+ rps->last_adj = 0;
+ }
+
+ mod_timer(&rps->timer,
+ jiffies + msecs_to_jiffies(rps->pm_interval));
+ rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
+ }
+}
+
+static void rps_start_timer(struct intel_rps *rps)
+{
+ rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+ rps->pm_interval = 1;
+ mod_timer(&rps->timer, jiffies + 1);
+}
+
+static void rps_stop_timer(struct intel_rps *rps)
+{
+ del_timer_sync(&rps->timer);
+ rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+ cancel_work_sync(&rps->work);
+}
+
static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
{
u32 mask = 0;
@@ -69,21 +168,17 @@ static void rps_enable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- rps_reset_ei(rps);
+ GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
+ rps->pm_events, rps_pm_mask(rps, rps->last_freq));
- if (IS_VALLEYVIEW(gt->i915))
- /* WaGsvRC0ResidencyMethod:vlv */
- rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
- else
- rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
- GEN6_PM_RP_DOWN_THRESHOLD |
- GEN6_PM_RP_DOWN_TIMEOUT);
+ rps_reset_ei(rps);
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_enable_irq(gt, rps->pm_events);
spin_unlock_irq(&gt->irq_lock);
- set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
+ intel_uncore_write(gt->uncore,
+ GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
}
static void gen6_rps_reset_interrupts(struct intel_rps *rps)
@@ -115,9 +210,8 @@ static void rps_disable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- rps->pm_events = 0;
-
- set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
+ intel_uncore_write(gt->uncore,
+ GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
@@ -134,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
cancel_work_sync(&rps->work);
rps_reset_interrupts(rps);
+ GT_TRACE(gt, "interrupts:off\n");
}
static const struct cparams {
@@ -180,14 +275,12 @@ static void gen5_rps_init(struct intel_rps *rps)
fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
MEMMODE_FSTART_SHIFT;
- DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
- fmax, fmin, fstart);
+ drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
rps->min_freq = fmax;
+ rps->efficient_freq = fstart;
rps->max_freq = fmin;
-
- rps->idle_freq = rps->min_freq;
- rps->cur_freq = rps->idle_freq;
}
static unsigned long
@@ -450,7 +543,8 @@ static bool gen5_rps_enable(struct intel_rps *rps)
if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
MEMCTL_CMD_STS) == 0, 10))
- DRM_ERROR("stuck trying to change perf mode\n");
+ drm_err(&uncore->i915->drm,
+ "stuck trying to change perf mode\n");
mdelay(1);
gen5_rps_set(rps, rps->cur_freq);
@@ -527,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val)
static void rps_set_power(struct intel_rps *rps, int new_power)
{
- struct intel_uncore *uncore = rps_to_uncore(rps);
- struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
@@ -537,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (new_power == rps->power.mode)
return;
+ threshold_up = 95;
+ threshold_down = 85;
+
/* Note the units here are not exactly 1us, but 1280ns. */
switch (new_power) {
case LOW_POWER:
- /* Upclock if more than 95% busy over 16ms */
ei_up = 16000;
- threshold_up = 95;
-
- /* Downclock if less than 85% busy over 32ms */
ei_down = 32000;
- threshold_down = 85;
break;
case BETWEEN:
- /* Upclock if more than 90% busy over 13ms */
ei_up = 13000;
- threshold_up = 90;
-
- /* Downclock if less than 75% busy over 32ms */
ei_down = 32000;
- threshold_down = 75;
break;
case HIGH_POWER:
- /* Upclock if more than 85% busy over 10ms */
ei_up = 10000;
- threshold_up = 85;
-
- /* Downclock if less than 60% busy over 32ms */
ei_down = 32000;
- threshold_down = 60;
break;
}
/* When byt can survive without system hang with dynamic
* sw freq adjustments, this restriction can be lifted.
*/
- if (IS_VALLEYVIEW(i915))
+ if (IS_VALLEYVIEW(gt->i915))
goto skip_hw_write;
- set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+ GT_TRACE(gt,
+ "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
+ new_power, threshold_up, ei_up, threshold_down, ei_down);
+
+ set(uncore, GEN6_RP_UP_EI,
+ intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
set(uncore, GEN6_RP_UP_THRESHOLD,
- GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+ intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
- set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+ set(uncore, GEN6_RP_DOWN_EI,
+ intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
set(uncore, GEN6_RP_DOWN_THRESHOLD,
- GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+ intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
set(uncore, GEN6_RP_CONTROL,
- (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
GEN6_RP_MEDIA_HW_NORMAL_MODE |
GEN6_RP_MEDIA_IS_GFX |
GEN6_RP_ENABLE |
@@ -640,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
{
+ GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
+
mutex_lock(&rps->power.mutex);
if (interactive) {
- if (!rps->power.interactive++ && rps->active)
+ if (!rps->power.interactive++ && intel_rps_is_active(rps))
rps_set_power(rps, HIGH_POWER);
} else {
GEM_BUG_ON(!rps->power.interactive);
@@ -667,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val)
GEN6_AGGRESSIVE_TURBO);
set(uncore, GEN6_RPNSWREQ, swreq);
+ GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
+ val, intel_gpu_freq(rps, val), swreq);
+
return 0;
}
@@ -679,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
vlv_punit_put(i915);
+ GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
+ val, intel_gpu_freq(rps, val));
+
return err;
}
@@ -709,25 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update)
void intel_rps_unpark(struct intel_rps *rps)
{
- u8 freq;
-
- if (!rps->enabled)
+ if (!intel_rps_is_enabled(rps))
return;
+ GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
+
/*
* Use the user's desired frequency as a guide, but for better
* performance, jump directly to RPe as our starting frequency.
*/
mutex_lock(&rps->lock);
- rps->active = true;
- freq = max(rps->cur_freq, rps->efficient_freq),
- freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
- intel_rps_set(rps, freq);
- rps->last_adj = 0;
+
+ intel_rps_set_active(rps);
+ intel_rps_set(rps,
+ clamp(rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit));
+
mutex_unlock(&rps->lock);
- if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps->pm_iir = 0;
+ if (intel_rps_has_interrupts(rps))
rps_enable_interrupts(rps);
+ if (intel_rps_uses_timer(rps))
+ rps_start_timer(rps);
if (IS_GEN(rps_to_i915(rps), 5))
gen5_rps_update(rps);
@@ -735,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps)
void intel_rps_park(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
+ int adj;
- if (!rps->enabled)
+ if (!intel_rps_clear_active(rps))
return;
- if (INTEL_GEN(i915) >= 6)
+ if (intel_rps_uses_timer(rps))
+ rps_stop_timer(rps);
+ if (intel_rps_has_interrupts(rps))
rps_disable_interrupts(rps);
- rps->active = false;
if (rps->last_freq <= rps->idle_freq)
return;
@@ -763,14 +865,34 @@ void intel_rps_park(struct intel_rps *rps)
intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
rps_set(rps, rps->idle_freq, false);
intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+
+ /*
+ * Since we will try and restart from the previously requested
+ * frequency on unparking, treat this idle point as a downclock
+ * interrupt and reduce the frequency for resume. If we park/unpark
+ * more frequently than the rps worker can run, we will not respond
+ * to any EI and never see a change in frequency.
+ *
+ * (Note we accommodate Cherryview's limitation of only using an
+ * even bin by applying it to all.)
+ */
+ adj = rps->last_adj;
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = -2;
+ rps->last_adj = adj;
+ rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+
+ GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
}
void intel_rps_boost(struct i915_request *rq)
{
- struct intel_rps *rps = &rq->engine->gt->rps;
+ struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
unsigned long flags;
- if (i915_request_signaled(rq) || !rps->active)
+ if (i915_request_signaled(rq) || !intel_rps_is_active(rps))
return;
/* Serializes with i915_request_retire() */
@@ -779,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq)
!dma_fence_is_signaled_locked(&rq->fence)) {
set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
+ GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+ rq->fence.context, rq->fence.seqno);
+
if (!atomic_fetch_inc(&rps->num_waiters) &&
READ_ONCE(rps->cur_freq) < rps->boost_freq)
schedule_work(&rps->work);
@@ -796,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
GEM_BUG_ON(val > rps->max_freq);
GEM_BUG_ON(val < rps->min_freq);
- if (rps->active) {
+ if (intel_rps_is_active(rps)) {
err = rps_set(rps, val, true);
if (err)
return err;
@@ -805,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
* Make sure we continue to get interrupts
* until we hit the minimum or maximum frequencies.
*/
- if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+ if (intel_rps_has_interrupts(rps)) {
struct intel_uncore *uncore = rps_to_uncore(rps);
set(uncore,
@@ -873,12 +998,14 @@ static void gen6_rps_init(struct intel_rps *rps)
static bool rps_reset(struct intel_rps *rps)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
/* force a reset */
rps->power.mode = -1;
rps->last_freq = -1;
if (rps_set(rps, rps->min_freq, true)) {
- DRM_ERROR("Failed to reset RPS to initial values\n");
+ drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
return false;
}
@@ -889,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps)
/* See the Gen9_GT_PM_Programming_Guide doc for the below */
static bool gen9_rps_enable(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
/* Program defaults and thresholds for RPS */
- if (IS_GEN(i915, 9))
+ if (IS_GEN(gt->i915, 9))
intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
GEN9_FREQUENCY(rps->rp1_freq));
- /* 1 second timeout */
- intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
- GT_INTERVAL_FROM_US(i915, 1000000));
-
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
return rps_reset(rps);
}
@@ -913,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
HSW_FREQUENCY(rps->rp1_freq));
- /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
- intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
- 100000000 / 128); /* 1 second timeout */
-
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
return rps_reset(rps);
}
@@ -930,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
return rps_reset(rps);
}
@@ -1015,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_AVG);
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
/* Setting Fixed Bias */
vlv_punit_get(i915);
@@ -1029,8 +1160,8 @@ static bool chv_rps_enable(struct intel_rps *rps)
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
- DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
}
@@ -1113,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_CONT);
+ /* WaGsvRC0ResidencyMethod:vlv */
+ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+
vlv_punit_get(i915);
/* Setting Fixed Bias */
@@ -1127,8 +1261,8 @@ static bool vlv_rps_enable(struct intel_rps *rps)
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
- DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
}
@@ -1171,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
return ips->gfx_power + state2;
}
+static bool has_busy_stats(struct intel_rps *rps)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, rps_to_gt(rps), id) {
+ if (!intel_engine_supports_stats(engine))
+ return false;
+ }
+
+ return true;
+}
+
void intel_rps_enable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ bool enabled = false;
+
+ if (!HAS_RPS(i915))
+ return;
+
+ intel_gt_check_clock_frequency(rps_to_gt(rps));
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
- if (IS_CHERRYVIEW(i915))
- rps->enabled = chv_rps_enable(rps);
+ if (rps->max_freq <= rps->min_freq)
+ /* leave disabled, no room for dynamic reclocking */;
+ else if (IS_CHERRYVIEW(i915))
+ enabled = chv_rps_enable(rps);
else if (IS_VALLEYVIEW(i915))
- rps->enabled = vlv_rps_enable(rps);
+ enabled = vlv_rps_enable(rps);
else if (INTEL_GEN(i915) >= 9)
- rps->enabled = gen9_rps_enable(rps);
+ enabled = gen9_rps_enable(rps);
else if (INTEL_GEN(i915) >= 8)
- rps->enabled = gen8_rps_enable(rps);
+ enabled = gen8_rps_enable(rps);
else if (INTEL_GEN(i915) >= 6)
- rps->enabled = gen6_rps_enable(rps);
+ enabled = gen6_rps_enable(rps);
else if (IS_IRONLAKE_M(i915))
- rps->enabled = gen5_rps_enable(rps);
+ enabled = gen5_rps_enable(rps);
+ else
+ MISSING_CASE(INTEL_GEN(i915));
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
- if (!rps->enabled)
+ if (!enabled)
return;
- drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
- drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
+ GT_TRACE(rps_to_gt(rps),
+ "min:%x, max:%x, freq:[%d, %d]\n",
+ rps->min_freq, rps->max_freq,
+ intel_gpu_freq(rps, rps->min_freq),
+ intel_gpu_freq(rps, rps->max_freq));
+
+ GEM_BUG_ON(rps->max_freq < rps->min_freq);
+ GEM_BUG_ON(rps->idle_freq > rps->max_freq);
+
+ GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
+ GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
- drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
- drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
+ if (has_busy_stats(rps))
+ intel_rps_set_timer(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ intel_rps_set_interrupts(rps);
+ else
+ /* Ironlake currently uses intel_ips.ko */ {}
+
+ intel_rps_set_enabled(rps);
}
static void gen6_rps_disable(struct intel_rps *rps)
@@ -1209,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- rps->enabled = false;
+ intel_rps_clear_enabled(rps);
+ intel_rps_clear_interrupts(rps);
+ intel_rps_clear_timer(rps);
if (INTEL_GEN(i915) >= 6)
gen6_rps_disable(rps);
@@ -1285,7 +1459,8 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
CCK_GPLL_CLOCK_CONTROL,
i915->czclk_freq);
- DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+ drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
+ rps->gpll_ref_freq);
}
static void vlv_rps_init(struct intel_rps *rps)
@@ -1313,28 +1488,24 @@ static void vlv_rps_init(struct intel_rps *rps)
i915->mem_freq = 1333;
break;
}
- DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+ drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
rps->max_freq = vlv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
- DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->max_freq),
- rps->max_freq);
+ drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
rps->efficient_freq = vlv_rps_rpe_freq(rps);
- DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->efficient_freq),
- rps->efficient_freq);
+ drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
rps->rp1_freq = vlv_rps_guar_freq(rps);
- DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->rp1_freq),
- rps->rp1_freq);
+ drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
rps->min_freq = vlv_rps_min_freq(rps);
- DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->min_freq),
- rps->min_freq);
+ drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
vlv_iosf_sb_put(i915,
BIT(VLV_IOSF_SB_PUNIT) |
@@ -1364,28 +1535,24 @@ static void chv_rps_init(struct intel_rps *rps)
i915->mem_freq = 1600;
break;
}
- DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+ drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
rps->max_freq = chv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
- DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->max_freq),
- rps->max_freq);
+ drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
rps->efficient_freq = chv_rps_rpe_freq(rps);
- DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->efficient_freq),
- rps->efficient_freq);
+ drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
rps->rp1_freq = chv_rps_guar_freq(rps);
- DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->rp1_freq),
- rps->rp1_freq);
+ drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
rps->min_freq = chv_rps_min_freq(rps);
- DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->min_freq),
- rps->min_freq);
+ drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
vlv_iosf_sb_put(i915,
BIT(VLV_IOSF_SB_PUNIT) |
@@ -1448,20 +1615,25 @@ static void rps_work(struct work_struct *work)
{
struct intel_rps *rps = container_of(work, typeof(*rps), work);
struct intel_gt *gt = rps_to_gt(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
bool client_boost = false;
int new_freq, adj, min, max;
u32 pm_iir = 0;
spin_lock_irq(&gt->irq_lock);
- pm_iir = fetch_and_zero(&rps->pm_iir);
+ pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
client_boost = atomic_read(&rps->num_waiters);
spin_unlock_irq(&gt->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
- if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+ if (!pm_iir && !client_boost)
goto out;
mutex_lock(&rps->lock);
+ if (!intel_rps_is_active(rps)) {
+ mutex_unlock(&rps->lock);
+ return;
+ }
pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
@@ -1471,6 +1643,12 @@ static void rps_work(struct work_struct *work)
max = rps->max_freq_softlimit;
if (client_boost)
max = rps->max_freq;
+
+ GT_TRACE(gt,
+ "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
+ pm_iir, yesno(client_boost),
+ adj, new_freq, min, max);
+
if (client_boost && new_freq < rps->boost_freq) {
new_freq = rps->boost_freq;
adj = 0;
@@ -1502,30 +1680,18 @@ static void rps_work(struct work_struct *work)
adj = 0;
}
- rps->last_adj = adj;
-
/*
- * Limit deboosting and boosting to keep ourselves at the extremes
- * when in the respective power modes (i.e. slowly decrease frequencies
- * while in the HIGH_POWER zone and slowly increase frequencies while
- * in the LOW_POWER zone). On idle, we will hit the timeout and drop
- * to the next level quickly, and conversely if busy we expect to
- * hit a waitboost and rapidly switch into max power.
- */
- if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
- (adj > 0 && rps->power.mode == LOW_POWER))
- rps->last_adj = 0;
-
- /* sysfs frequency interfaces may have snuck in while servicing the
- * interrupt
+ * sysfs frequency limits may have snuck in while
+ * servicing the interrupt
*/
new_freq += adj;
new_freq = clamp_t(int, new_freq, min, max);
if (intel_rps_set(rps, new_freq)) {
- DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- rps->last_adj = 0;
+ drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
+ adj = 0;
}
+ rps->last_adj = adj;
mutex_unlock(&rps->lock);
@@ -1545,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
if (unlikely(!events))
return;
+ GT_TRACE(gt, "irq events:%x\n", events);
+
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
@@ -1554,11 +1722,17 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
{
struct intel_gt *gt = rps_to_gt(rps);
+ u32 events;
- if (pm_iir & rps->pm_events) {
+ events = pm_iir & rps->pm_events;
+ if (events) {
spin_lock(&gt->irq_lock);
- gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
- rps->pm_iir |= pm_iir & rps->pm_events;
+
+ GT_TRACE(gt, "irq events:%x\n", events);
+
+ gen6_gt_pm_mask_irq(gt, events);
+ rps->pm_iir |= events;
+
schedule_work(&rps->work);
spin_unlock(&gt->irq_lock);
}
@@ -1613,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps)
mutex_init(&rps->power.mutex);
INIT_WORK(&rps->work, rps_work);
+ timer_setup(&rps->timer, rps_timer, 0);
atomic_set(&rps->num_waiters, 0);
}
@@ -1641,9 +1816,10 @@ void intel_rps_init(struct intel_rps *rps)
sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
&params, NULL);
if (params & BIT(31)) { /* OC supported */
- DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
- (rps->max_freq & 0xff) * 50,
- (params & 0xff) * 50);
+ drm_dbg(&i915->drm,
+ "Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+ (rps->max_freq & 0xff) * 50,
+ (params & 0xff) * 50);
rps->max_freq = params & 0xff;
}
}
@@ -1651,7 +1827,9 @@ void intel_rps_init(struct intel_rps *rps)
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq;
rps->idle_freq = rps->min_freq;
- rps->cur_freq = rps->idle_freq;
+
+ /* Start in the middle, from here we will autotune based on workload */
+ rps->cur_freq = rps->efficient_freq;
rps->pm_intrmsk_mbz = 0;
@@ -1668,6 +1846,12 @@ void intel_rps_init(struct intel_rps *rps)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
}
+void intel_rps_sanitize(struct intel_rps *rps)
+{
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps_disable_interrupts(rps);
+}
+
u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -1695,7 +1879,7 @@ static u32 read_cagf(struct intel_rps *rps)
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
} else {
- freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+ freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1);
}
return intel_rps_get_cagf(rps, freq);
@@ -1703,7 +1887,7 @@ static u32 read_cagf(struct intel_rps *rps)
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
- struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+ struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
intel_wakeref_t wakeref;
u32 freq = 0;
@@ -1903,3 +2087,7 @@ bool i915_gpu_turbo_disable(void)
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rps.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index dfa98194f3b2..8d3c9d663662 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -13,6 +13,7 @@ struct i915_request;
void intel_rps_init_early(struct intel_rps *rps);
void intel_rps_init(struct intel_rps *rps);
+void intel_rps_sanitize(struct intel_rps *rps);
void intel_rps_driver_register(struct intel_rps *rps);
void intel_rps_driver_unregister(struct intel_rps *rps);
@@ -36,4 +37,64 @@ void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+static inline bool intel_rps_is_enabled(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline void intel_rps_set_enabled(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline void intel_rps_clear_enabled(struct intel_rps *rps)
+{
+ clear_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline bool intel_rps_is_active(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline void intel_rps_set_active(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline bool intel_rps_clear_active(struct intel_rps *rps)
+{
+ return test_and_clear_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline bool intel_rps_has_interrupts(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline void intel_rps_set_interrupts(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline void intel_rps_clear_interrupts(struct intel_rps *rps)
+{
+ clear_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline bool intel_rps_uses_timer(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
+static inline void intel_rps_set_timer(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
+static inline void intel_rps_clear_timer(struct intel_rps *rps)
+{
+ clear_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
#endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index c2e279154bd5..38083f0402d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -31,6 +31,13 @@ struct intel_rps_ei {
u32 media_c0;
};
+enum {
+ INTEL_RPS_ENABLED = 0,
+ INTEL_RPS_ACTIVE,
+ INTEL_RPS_INTERRUPTS,
+ INTEL_RPS_TIMER,
+};
+
struct intel_rps {
struct mutex lock; /* protects enabling and the worker */
@@ -38,9 +45,12 @@ struct intel_rps {
* work, interrupts_enabled and pm_iir are protected by
* dev_priv->irq_lock
*/
+ struct timer_list timer;
struct work_struct work;
- bool enabled;
- bool active;
+ unsigned long flags;
+
+ ktime_t pm_timestamp;
+ u32 pm_interval;
u32 pm_iir;
/* PM interrupt bits that should never be masked */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 74f793423231..d173271c7397 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -65,7 +65,6 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
bool subslice_pg = sseu->has_subslice_pg;
- struct intel_sseu ctx_sseu;
u8 slices, subslices;
u32 rpcs = 0;
@@ -78,31 +77,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
/*
* If i915/perf is active, we want a stable powergating configuration
- * on the system.
- *
- * We could choose full enablement, but on ICL we know there are use
- * cases which disable slices for functional, apart for performance
- * reasons. So in this case we select a known stable subset.
+ * on the system. Use the configuration pinned by i915/perf.
*/
- if (!i915->perf.exclusive_stream) {
- ctx_sseu = *req_sseu;
- } else {
- ctx_sseu = intel_sseu_from_device_info(sseu);
-
- if (IS_GEN(i915, 11)) {
- /*
- * We only need subslice count so it doesn't matter
- * which ones we select - just turn off low bits in the
- * amount of half of all available subslices per slice.
- */
- ctx_sseu.subslice_mask =
- ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
- ctx_sseu.slice_mask = 0x1;
- }
- }
+ if (i915->perf.exclusive_stream)
+ req_sseu = &i915->perf.sseu;
- slices = hweight8(ctx_sseu.slice_mask);
- subslices = hweight8(ctx_sseu.subslice_mask);
+ slices = hweight8(req_sseu->slice_mask);
+ subslices = hweight8(req_sseu->subslice_mask);
/*
* Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
@@ -175,13 +156,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
if (sseu->has_eu_pg) {
u32 val;
- val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+ val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
val &= GEN8_RPCS_EU_MIN_MASK;
rpcs |= val;
- val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+ val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
val &= GEN8_RPCS_EU_MAX_MASK;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 54e1e55f3c81..4546284fede1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -119,6 +119,15 @@ static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
spin_unlock_irqrestore(&gt->hwsp_lock, flags);
}
+static void __rcu_cacheline_free(struct rcu_head *rcu)
+{
+ struct intel_timeline_cacheline *cl =
+ container_of(rcu, typeof(*cl), rcu);
+
+ i915_active_fini(&cl->active);
+ kfree(cl);
+}
+
static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
{
GEM_BUG_ON(!i915_active_is_idle(&cl->active));
@@ -127,8 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
i915_vma_put(cl->hwsp->vma);
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
- i915_active_fini(&cl->active);
- kfree_rcu(cl, rcu);
+ call_rcu(&cl->rcu, __rcu_cacheline_free);
}
__i915_active_call
@@ -192,16 +200,20 @@ static void cacheline_release(struct intel_timeline_cacheline *cl)
static void cacheline_free(struct intel_timeline_cacheline *cl)
{
+ if (!i915_active_acquire_if_busy(&cl->active)) {
+ __idle_cacheline_free(cl);
+ return;
+ }
+
GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
- if (i915_active_is_idle(&cl->active))
- __idle_cacheline_free(cl);
+ i915_active_release(&cl->active);
}
-int intel_timeline_init(struct intel_timeline *timeline,
- struct intel_gt *gt,
- struct i915_vma *hwsp)
+static int intel_timeline_init(struct intel_timeline *timeline,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp)
{
void *vaddr;
@@ -268,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt)
INIT_LIST_HEAD(&timelines->hwsp_free_list);
}
-void intel_timeline_fini(struct intel_timeline *timeline)
+static void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
@@ -325,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl)
return 0;
}
+void intel_timeline_reset_seqno(const struct intel_timeline *tl)
+{
+ /* Must be pinned to be writable, and no requests in flight. */
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+}
+
void intel_timeline_enter(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
@@ -353,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl)
return;
spin_lock(&timelines->lock);
- if (!atomic_fetch_inc(&tl->active_count))
+ if (!atomic_fetch_inc(&tl->active_count)) {
+ /*
+ * The HWSP is volatile, and may have been lost while inactive,
+ * e.g. across suspend/resume. Be paranoid, and ensure that
+ * the HWSP value matches our seqno so we don't proclaim
+ * the next request as already complete.
+ */
+ intel_timeline_reset_seqno(tl);
list_add_tail(&tl->link, &timelines->active_list);
+ }
spin_unlock(&timelines->lock);
}
@@ -517,6 +544,8 @@ int intel_timeline_read_hwsp(struct i915_request *from,
rcu_read_lock();
cl = rcu_dereference(from->hwsp_cacheline);
+ if (i915_request_completed(from)) /* confirm cacheline is valid */
+ goto unlock;
if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
goto unlock; /* seqno wrapped and completed! */
if (unlikely(i915_request_completed(from)))
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index f5b7eade3809..4298b9ac7327 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -31,11 +31,6 @@
#include "i915_syncmap.h"
#include "gt/intel_timeline_types.h"
-int intel_timeline_init(struct intel_timeline *tl,
- struct intel_gt *gt,
- struct i915_vma *hwsp);
-void intel_timeline_fini(struct intel_timeline *tl);
-
struct intel_timeline *
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
@@ -84,6 +79,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
void intel_timeline_exit(struct intel_timeline *tl);
void intel_timeline_unpin(struct intel_timeline *tl);
+void intel_timeline_reset_seqno(const struct intel_timeline *tl);
+
int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *until,
u32 *hwsp_offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 887e0dc701f7..90a2b9e399b0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
-
/* WaForceContextSaveRestoreNonCoherent:cnl */
WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
- /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
- /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
/* WaPushConstantDereferenceHoldDisable:cnl */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
@@ -575,29 +564,46 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* allow headerless messages for preemptible GPGPU context */
WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+
+ /* Wa_1604278689:icl,ehl */
+ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
+ wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER,
+ 0, /* write-only register; skip validation */
+ 0xFFFFFFFF);
+
+ /* Wa_1406306137:icl,ehl */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
}
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- u32 val;
-
- /* Wa_1409142259:tgl */
+ /*
+ * Wa_1409142259:tgl
+ * Wa_1409347922:tgl
+ * Wa_1409252684:tgl
+ * Wa_1409217633:tgl
+ * Wa_1409207793:tgl
+ * Wa_1409178076:tgl
+ * Wa_1408979724:tgl
+ */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
- /* Wa_1604555607:tgl */
- val = intel_uncore_read(engine->uncore, FF_MODE2);
- val &= ~FF_MODE2_TDS_TIMER_MASK;
- val |= FF_MODE2_TDS_TIMER_128;
/*
- * FIXME: FF_MODE2 register is not readable till TGL B0. We can
- * enable verification of WA from the later steppings, which enables
- * the read of FF_MODE2.
+ * Wa_1604555607:gen12 and Wa_1608008084:gen12
+ * FF_MODE2 register will return the wrong value when read. The default
+ * value for this register is zero for all fields and there are no bit
+ * masks. So instead of doing a RMW we should just write the TDS timer
+ * value for Wa_1604555607.
*/
- wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
- IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
- FF_MODE2_TDS_TIMER_MASK);
+ wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128, 0);
+
+ /* WaDisableGPGPUMidThreadPreemption:tgl */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+ GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
}
static void
@@ -820,7 +826,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
GEN10_L3BANK_MASK;
- DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
+ drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
} else {
l3_en = ~0;
@@ -829,7 +835,8 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
slice = fls(sseu->slice_mask) - 1;
subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
if (!subslice) {
- DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
+ drm_warn(&i915->drm,
+ "No common index found between subslice mask %x and L3 bank mask %x!\n",
intel_sseu_get_subslices(sseu, slice), l3_en);
subslice = fls(l3_en);
drm_WARN_ON(&i915->drm, !subslice);
@@ -844,7 +851,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
}
- DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
+ drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
}
@@ -854,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
wa_init_mcr(i915, wal);
- /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
-
/* WaInPlaceDecompressionHang:cnl */
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
@@ -903,11 +904,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
SLICE_UNIT_LEVEL_CLKGATE,
MSCUNIT_CLKGATE_DIS);
- /* Wa_1406680159:icl */
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE,
- GWUNIT_CLKGATE_DIS);
-
/* Wa_1406838659:icl (pre-prod) */
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
wa_write_or(wal,
@@ -921,22 +917,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
- /* Wa_1607087056:icl */
- wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
- L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ /* Wa_1607087056:icl,ehl,jsl */
+ if (IS_ICELAKE(i915) ||
+ IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ }
}
static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
+ wa_init_mcr(i915, wal);
+
/* Wa_1409420604:tgl */
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
SUBSLICE_UNIT_LEVEL_CLKGATE2,
CPSSUNIT_CLKGATE_DIS);
- /* Wa_1409180338:tgl */
+ /* Wa_1607087056:tgl also know as BUG:1409180338 */
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
@@ -1251,6 +1252,7 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
case RENDER_CLASS:
/*
* WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+ * Wa_1408556865:tgl
*
* This covers 4 registers which are next to one another :
* - PS_INVOCATION_COUNT
@@ -1264,6 +1266,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
/* Wa_1808121037:tgl */
whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
+
+ /* Wa_1806527549:tgl */
+ whitelist_reg(w, HIZ_CHICKEN);
break;
default:
break;
@@ -1330,19 +1335,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
struct drm_i915_private *i915 = engine->i915;
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
- /* Wa_1606700617:tgl */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
-
- /* Wa_1607138336:tgl */
+ /*
+ * Wa_1607138336:tgl
+ * Wa_1607063988:tgl
+ */
wa_write_or(wal,
GEN9_CTX_PREEMPT_REG,
GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
- /* Wa_1607030317:tgl */
- /* Wa_1607186500:tgl */
- /* Wa_1607297627:tgl */
+ /*
+ * Wa_1607030317:tgl
+ * Wa_1607186500:tgl
+ * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2
+ * of then says it is fixed on B0 the other one says it is
+ * permanent
+ */
wa_masked_en(wal,
GEN6_RC_SLEEP_PSMI_CONTROL,
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
@@ -1361,10 +1368,29 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+ /* Wa_1408615072:tgl */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+ VSUNIT_CLKGATE_DIS_TGL);
+ }
+
+ if (IS_TIGERLAKE(i915)) {
/* Wa_1606931601:tgl */
+ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+
+ /* Wa_1409804808:tgl */
+ wa_masked_en(wal, GEN7_ROW_CHICKEN2,
+ GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+
+ /* Wa_1606700617:tgl */
wa_masked_en(wal,
- GEN7_ROW_CHICKEN2,
- GEN12_DISABLE_EARLY_READ);
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+
+ /*
+ * Wa_1409085225:tgl
+ * Wa_14010229206:tgl
+ */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
}
if (IS_GEN(i915, 11)) {
@@ -1430,10 +1456,38 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN11_SCRATCH2,
GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
0);
+
+ /* WaEnable32PlaneMode:icl */
+ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
+ GEN11_ENABLE_32_PLANE_MODE);
+
+ /*
+ * Wa_1408615072:icl,ehl (vsunit)
+ * Wa_1407596294:icl,ehl (hsunit)
+ */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
+ /* Wa_1407352427:icl,ehl */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+ PSDUNIT_CLKGATE_DIS);
+
+ /* Wa_1406680159:icl,ehl */
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE,
+ GWUNIT_CLKGATE_DIS);
+
+ /*
+ * Wa_1408767742:icl[a2..forever],ehl[all]
+ * Wa_1605460711:icl[a0..c0]
+ */
+ wa_write_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
}
- if (IS_GEN_RANGE(i915, 9, 11)) {
- /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
+ if (IS_GEN_RANGE(i915, 9, 12)) {
+ /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
wa_masked_en(wal,
GEN7_FF_SLICE_CS_CHICKEN1,
GEN9_FFSC_PERCTX_PREEMPT_CTRL);
@@ -1600,15 +1654,34 @@ err_obj:
return ERR_PTR(err);
}
+static const struct {
+ u32 start;
+ u32 end;
+} mcr_ranges_gen8[] = {
+ { .start = 0x5500, .end = 0x55ff },
+ { .start = 0x7000, .end = 0x7fff },
+ { .start = 0x9400, .end = 0x97ff },
+ { .start = 0xb000, .end = 0xb3ff },
+ { .start = 0xe000, .end = 0xe7ff },
+ {},
+};
+
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
+ int i;
+
+ if (INTEL_GEN(i915) < 8)
+ return false;
+
/*
- * Registers in this range are affected by the MCR selector
+ * Registers in these ranges are affected by the MCR selector
* which only controls CPU initiated MMIO. Routing does not
* work for CS access so we cannot verify them on this path.
*/
- if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
- return true;
+ for (i = 0; mcr_ranges_gen8[i].start; i++)
+ if (offset >= mcr_ranges_gen8[i].start &&
+ offset <= mcr_ranges_gen8[i].end)
+ return true;
return false;
}
diff --git a/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c
new file mode 100644
index 000000000000..610ca7687735
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC
+ */
+
+static const u32 ivb_clear_kernel[] = {
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000040, 0x20280c21, 0x00000028, 0x00000001,
+ 0x01000010, 0x20000c20, 0x0000002c, 0x00000000,
+ 0x00010220, 0x34001c00, 0x00001400, 0x0000002c,
+ 0x00600001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c,
+ 0x00000005, 0x20601ca5, 0x00000060, 0x00000001,
+ 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d,
+ 0x00000005, 0x20641ca5, 0x00000064, 0x00000003,
+ 0x00000041, 0x207424a5, 0x00000064, 0x00000034,
+ 0x00000040, 0x206014a5, 0x00000060, 0x00000074,
+ 0x00000008, 0x20681c85, 0x00000e00, 0x00000008,
+ 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f,
+ 0x00000041, 0x20701ca5, 0x00000060, 0x00000010,
+ 0x00000040, 0x206814a5, 0x00000068, 0x00000070,
+ 0x00600001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007,
+ 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004,
+ 0x00600001, 0x20800021, 0x008d0000, 0x00000000,
+ 0x00000001, 0x20800021, 0x0000006c, 0x00000000,
+ 0x00000001, 0x20840021, 0x00000068, 0x00000000,
+ 0x00000001, 0x20880061, 0x00000000, 0x00000003,
+ 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001,
+ 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001,
+ 0x02000040, 0x20281c21, 0x00000028, 0xffffffff,
+ 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc,
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000001, 0x220010e4, 0x00000000, 0x00000000,
+ 0x00000001, 0x220831ec, 0x00000000, 0x007f007f,
+ 0x00600001, 0x20400021, 0x008d0000, 0x00000000,
+ 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000,
+ 0x00200001, 0x20400121, 0x00450020, 0x00000000,
+ 0x00000001, 0x20480061, 0x00000000, 0x000f000f,
+ 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef,
+ 0x00800001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20800061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20c00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20e00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21000061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21200061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21400061, 0x00000000, 0x00000000,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x00000040, 0x20402d21, 0x00000020, 0x00100010,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff,
+ 0x00800001, 0xa0000109, 0x00000602, 0x00000000,
+ 0x00000040, 0x22001c84, 0x00000200, 0x00000020,
+ 0x00010220, 0x34001c00, 0x00001400, 0xfffffff8,
+ 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010,
+};
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 5633515c12e9..b8dd3cbc8696 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -28,7 +28,6 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "mock_engine.h"
#include "selftests/mock_request.h"
@@ -244,9 +243,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
/* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(request))
- dma_fence_set_error(&request->fence, -EIO);
-
+ i915_request_set_error_once(request, -EIO);
i915_request_mark_complete(request);
}
@@ -330,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
- intel_engine_pool_init(&engine->pool);
ce = create_kernel_context(engine);
if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index e874dfaa5316..52af1cee9a94 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)
/* Opencode i915_request_add() so we can keep the timeline locked. */
__i915_request_commit(rq);
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_queue(rq, NULL);
timeout = i915_request_wait(rq, 0, HZ / 10);
@@ -154,10 +155,7 @@ static int live_context_size(void *arg)
*/
for_each_engine(engine, gt, id) {
- struct {
- struct drm_i915_gem_object *state;
- void *pinned;
- } saved;
+ struct file *saved;
if (!engine->context_size)
continue;
@@ -171,8 +169,7 @@ static int live_context_size(void *arg)
* active state is sufficient, we are only checking that we
* don't use more than we planned.
*/
- saved.state = fetch_and_zero(&engine->default_state);
- saved.pinned = fetch_and_zero(&engine->pinned_default_state);
+ saved = fetch_and_zero(&engine->default_state);
/* Overlaps with the execlists redzone */
engine->context_size += I915_GTT_PAGE_SIZE;
@@ -181,8 +178,7 @@ static int live_context_size(void *arg)
engine->context_size -= I915_GTT_PAGE_SIZE;
- engine->pinned_default_state = saved.pinned;
- engine->default_state = saved.state;
+ engine->default_state = saved;
intel_engine_pm_put(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 43d4d589749f..697114dd1f47 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -142,6 +142,24 @@ out:
return err;
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int live_idle_flush(void *arg)
{
struct intel_gt *gt = arg;
@@ -152,9 +170,11 @@ static int live_idle_flush(void *arg)
/* Check that we can flush the idle barriers */
for_each_engine(engine, gt, id) {
- intel_engine_pm_get(engine);
+ unsigned long heartbeat;
+
+ engine_heartbeat_disable(engine, &heartbeat);
err = __live_idle_pulse(engine, intel_engine_flush_barriers);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
if (err)
break;
}
@@ -172,9 +192,11 @@ static int live_idle_pulse(void *arg)
/* Check that heartbeat pulses flush the idle barriers */
for_each_engine(engine, gt, id) {
- intel_engine_pm_get(engine);
+ unsigned long heartbeat;
+
+ engine_heartbeat_disable(engine, &heartbeat);
err = __live_idle_pulse(engine, intel_engine_pulse);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
if (err && err != -ENODEV)
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 09ff8e4f88af..242181a5214c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -7,6 +7,7 @@
#include "selftest_llc.h"
#include "selftest_rc6.h"
+#include "selftest_rps.h"
static int live_gt_resume(void *arg)
{
@@ -52,6 +53,13 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_rc6_manual),
+ SUBTEST(live_rps_clock_interval),
+ SUBTEST(live_rps_control),
+ SUBTEST(live_rps_frequency_cs),
+ SUBTEST(live_rps_frequency_srm),
+ SUBTEST(live_rps_power),
+ SUBTEST(live_rps_interrupt),
+ SUBTEST(live_rps_dynamic),
SUBTEST(live_gt_resume),
};
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 3e5e6c86e843..2b2efff6e19d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
cancel_rq:
if (err) {
- i915_request_skip(rq, err);
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
}
unpin_hws:
@@ -1640,7 +1640,7 @@ static int igt_reset_engines_atomic(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
igt_global_reset_lock(gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 49b93cda04ca..824f99c4cc7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -21,7 +21,8 @@
#include "gem/selftests/mock_context.h"
#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
-#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+#define NUM_GPR 16
+#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
static struct i915_vma *create_scratch(struct intel_gt *gt)
{
@@ -68,26 +69,84 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine,
engine->props.heartbeat_interval_ms = saved;
}
+static bool is_active(struct i915_request *rq)
+{
+ if (i915_request_is_active(rq))
+ return true;
+
+ if (i915_request_on_hold(rq))
+ return true;
+
+ if (i915_request_started(rq))
+ return true;
+
+ return false;
+}
+
static int wait_for_submit(struct intel_engine_cs *engine,
struct i915_request *rq,
unsigned long timeout)
{
timeout += jiffies;
do {
+ bool done = time_after(jiffies, timeout);
+
+ if (i915_request_completed(rq)) /* that was quick! */
+ return 0;
+
+ /* Wait until the HW has acknowleged the submission (or err) */
+ intel_engine_flush_submission(engine);
+ if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
+ return 0;
+
+ if (done)
+ return -ETIME;
+
+ cond_resched();
+ } while (1);
+}
+
+static int wait_for_reset(struct intel_engine_cs *engine,
+ struct i915_request *rq,
+ unsigned long timeout)
+{
+ timeout += jiffies;
+
+ do {
cond_resched();
intel_engine_flush_submission(engine);
if (READ_ONCE(engine->execlists.pending[0]))
continue;
- if (i915_request_is_active(rq))
- return 0;
+ if (i915_request_completed(rq))
+ break;
- if (i915_request_started(rq)) /* that was quick! */
- return 0;
+ if (READ_ONCE(rq->fence.error))
+ break;
} while (time_before(jiffies, timeout));
- return -ETIME;
+ flush_scheduled_work();
+
+ if (rq->fence.error != -EIO) {
+ pr_err("%s: hanging request %llx:%lld not reset\n",
+ engine->name,
+ rq->fence.context,
+ rq->fence.seqno);
+ return -EINVAL;
+ }
+
+ /* Give the request a jiffie to complete after flushing the worker */
+ if (i915_request_wait(rq, 0,
+ max(0l, (long)(timeout - jiffies)) + 1) < 0) {
+ pr_err("%s: hanging request %llx:%lld did not complete\n",
+ engine->name,
+ rq->fence.context,
+ rq->fence.seqno);
+ return -ETIME;
+ }
+
+ return 0;
}
static int live_sanitycheck(void *arg)
@@ -591,9 +650,9 @@ static int live_error_interrupt(void *arg)
error_repr(p->error[i]));
if (!i915_request_started(client[i])) {
- pr_debug("%s: %s request not stated!\n",
- engine->name,
- error_repr(p->error[i]));
+ pr_err("%s: %s request not started!\n",
+ engine->name,
+ error_repr(p->error[i]));
err = -ETIME;
goto out;
}
@@ -601,9 +660,10 @@ static int live_error_interrupt(void *arg)
/* Kick the tasklet to process the error */
intel_engine_flush_submission(engine);
if (client[i]->fence.error != p->error[i]) {
- pr_err("%s: %s request completed with wrong error code: %d\n",
+ pr_err("%s: %s request (%s) with wrong error code: %d\n",
engine->name,
error_repr(p->error[i]),
+ i915_request_completed(client[i]) ? "completed" : "running",
client[i]->fence.error);
err = -EINVAL;
goto out;
@@ -886,7 +946,7 @@ create_rewinder(struct intel_context *ce,
goto err;
}
- cs = intel_ring_begin(rq, 10);
+ cs = intel_ring_begin(rq, 14);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err;
@@ -898,8 +958,8 @@ create_rewinder(struct intel_context *ce,
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_NEQ_SDD;
- *cs++ = 0;
+ MI_SEMAPHORE_SAD_GTE_SDD;
+ *cs++ = idx;
*cs++ = offset;
*cs++ = 0;
@@ -908,6 +968,11 @@ create_rewinder(struct intel_context *ce,
*cs++ = offset + idx * sizeof(u32);
*cs++ = 0;
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = offset;
+ *cs++ = 0;
+ *cs++ = idx + 1;
+
intel_ring_advance(rq, cs);
rq->sched.attr.priority = I915_PRIORITY_MASK;
@@ -941,7 +1006,7 @@ static int live_timeslice_rewind(void *arg)
for_each_engine(engine, gt, id) {
enum { A1, A2, B1 };
- enum { X = 1, Y, Z };
+ enum { X = 1, Z, Y };
struct i915_request *rq[3] = {};
struct intel_context *ce;
unsigned long heartbeat;
@@ -974,13 +1039,13 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[0] = create_rewinder(ce, NULL, slot, 1);
+ rq[0] = create_rewinder(ce, NULL, slot, X);
if (IS_ERR(rq[0])) {
intel_context_put(ce);
goto err;
}
- rq[1] = create_rewinder(ce, NULL, slot, 2);
+ rq[1] = create_rewinder(ce, NULL, slot, Y);
intel_context_put(ce);
if (IS_ERR(rq[1]))
goto err;
@@ -998,7 +1063,7 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
- rq[2] = create_rewinder(ce, rq[0], slot, 3);
+ rq[2] = create_rewinder(ce, rq[0], slot, Z);
intel_context_put(ce);
if (IS_ERR(rq[2]))
goto err;
@@ -1009,18 +1074,14 @@ static int live_timeslice_rewind(void *arg)
engine->name);
goto err;
}
- GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
- GEM_BUG_ON(!i915_request_is_active(rq[A1]));
- GEM_BUG_ON(!i915_request_is_active(rq[A2]));
- GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
- /* Wait for the timeslice to kick in */
- del_timer(&engine->execlists.timer);
- tasklet_hi_schedule(&engine->execlists.tasklet);
- intel_engine_flush_submission(engine);
-
+ if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
+ /* Wait for the timeslice to kick in */
+ del_timer(&engine->execlists.timer);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+ intel_engine_flush_submission(engine);
+ }
/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));
@@ -1185,8 +1246,14 @@ static int live_timeslice_queue(void *arg)
if (err)
goto err_rq;
- intel_engine_flush_submission(engine);
+ /* Wait until we ack the release_queue and start timeslicing */
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+ } while (READ_ONCE(engine->execlists.pending[0]));
+
if (!READ_ONCE(engine->execlists.timer.expires) &&
+ execlists_active(&engine->execlists) == rq &&
!i915_request_completed(rq)) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
@@ -1805,14 +1872,9 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
if (err)
goto out;
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- err = -EIO;
- goto out;
- }
-
- if (rq->fence.error != -EIO) {
- pr_err("Cancelled inflight0 request did not report -EIO\n");
- err = -EINVAL;
+ err = wait_for_reset(arg->engine, rq, HZ / 2);
+ if (err) {
+ pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
@@ -1870,10 +1932,9 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
goto out;
igt_spinner_end(&arg->a.spin);
- if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
- err = -EIO;
+ err = wait_for_reset(arg->engine, rq[1], HZ / 2);
+ if (err)
goto out;
- }
if (rq[0]->fence.error != 0) {
pr_err("Normal inflight0 request did not complete\n");
@@ -1953,10 +2014,9 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
if (err)
goto out;
- if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
- err = -EIO;
+ err = wait_for_reset(arg->engine, rq[2], HZ / 2);
+ if (err)
goto out;
- }
if (rq[0]->fence.error != -EIO) {
pr_err("Cancelled inflight0 request did not report -EIO\n");
@@ -1994,6 +2054,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
return 0;
+ if (!intel_has_reset_engine(arg->engine->gt))
+ return 0;
+
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
@@ -2014,14 +2077,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
if (err)
goto out;
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- err = -EIO;
- goto out;
- }
-
- if (rq->fence.error != -EIO) {
- pr_err("Cancelled inflight0 request did not report -EIO\n");
- err = -EINVAL;
+ err = wait_for_reset(arg->engine, rq, HZ / 2);
+ if (err) {
+ pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
@@ -2109,7 +2167,7 @@ static int live_suppress_self_preempt(void *arg)
if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0; /* presume black blox */
if (intel_vgpu_active(gt->i915))
@@ -2599,7 +2657,7 @@ static int create_gang(struct intel_engine_cs *engine,
if (IS_ERR(rq))
goto err_obj;
- rq->batch = vma;
+ rq->batch = i915_vma_get(vma);
i915_request_get(rq);
i915_vma_lock(vma);
@@ -2623,6 +2681,7 @@ static int create_gang(struct intel_engine_cs *engine,
return 0;
err_rq:
+ i915_vma_put(rq->batch);
i915_request_put(rq);
err_obj:
i915_gem_object_put(obj);
@@ -2719,6 +2778,7 @@ static int live_preempt_gang(void *arg)
err = -ETIME;
}
+ i915_vma_put(rq->batch);
i915_request_put(rq);
rq = n;
}
@@ -2732,6 +2792,331 @@ static int live_preempt_gang(void *arg)
return 0;
}
+static struct i915_vma *
+create_gpr_user(struct intel_engine_cs *engine,
+ struct i915_vma *result,
+ unsigned int offset)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+ int i;
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, result->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(vma);
+ return ERR_CAST(cs);
+ }
+
+ /* All GPR are clear for new contexts. We use GPR(0) as a constant */
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = CS_GPR(engine, 0);
+ *cs++ = 1;
+
+ for (i = 1; i < NUM_GPR; i++) {
+ u64 addr;
+
+ /*
+ * Perform: GPR[i]++
+ *
+ * As we read and write into the context saved GPR[i], if
+ * we restart this batch buffer from an earlier point, we
+ * will repeat the increment and store a value > 1.
+ */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
+
+ addr = result->node.start + offset + i * sizeof(*cs);
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = CS_GPR(engine, 2 * i);
+ *cs++ = lower_32_bits(addr);
+ *cs++ = upper_32_bits(addr);
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_GTE_SDD;
+ *cs++ = i;
+ *cs++ = lower_32_bits(result->node.start);
+ *cs++ = upper_32_bits(result->node.start);
+ }
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ return vma;
+}
+
+static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, sz);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_ggtt_pin(vma, 0, 0);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static struct i915_request *
+create_gpr_client(struct intel_engine_cs *engine,
+ struct i915_vma *global,
+ unsigned int offset)
+{
+ struct i915_vma *batch, *vma;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ vma = i915_vma_instance(global->obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_ce;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_ce;
+
+ batch = create_gpr_user(engine, vma, offset);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_vma;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_batch;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ i915_vma_unlock(vma);
+
+ i915_vma_lock(batch);
+ if (!err)
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(batch);
+ i915_vma_unpin(batch);
+
+ if (!err)
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+out_batch:
+ i915_vma_put(batch);
+out_vma:
+ i915_vma_unpin(vma);
+out_ce:
+ intel_context_put(ce);
+ return err ? ERR_PTR(err) : rq;
+}
+
+static int preempt_user(struct intel_engine_cs *engine,
+ struct i915_vma *global,
+ int id)
+{
+ struct i915_sched_attr attr = {
+ .priority = I915_PRIORITY_MAX
+ };
+ struct i915_request *rq;
+ int err = 0;
+ u32 *cs;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(global);
+ *cs++ = 0;
+ *cs++ = id;
+
+ intel_ring_advance(rq, cs);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ engine->schedule(rq, &attr);
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int live_preempt_user(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_vma *global;
+ enum intel_engine_id id;
+ u32 *result;
+ int err = 0;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ /*
+ * In our other tests, we look at preemption in carefully
+ * controlled conditions in the ringbuffer. Since most of the
+ * time is spent in user batches, most of our preemptions naturally
+ * occur there. We want to verify that when we preempt inside a batch
+ * we continue on from the current instruction and do not roll back
+ * to the start, or another earlier arbitration point.
+ *
+ * To verify this, we create a batch which is a mixture of
+ * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
+ * a few preempting contexts thrown into the mix, we look for any
+ * repeated instructions (which show up as incorrect values).
+ */
+
+ global = create_global(gt, 4096);
+ if (IS_ERR(global))
+ return PTR_ERR(global);
+
+ result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
+ if (IS_ERR(result)) {
+ i915_vma_unpin_and_release(&global, 0);
+ return PTR_ERR(result);
+ }
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *client[3] = {};
+ struct igt_live_test t;
+ int i;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
+ continue; /* we need per-context GPR */
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+
+ memset(result, 0, 4096);
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct i915_request *rq;
+
+ rq = create_gpr_client(engine, global,
+ NUM_GPR * i * sizeof(u32));
+ if (IS_ERR(rq))
+ goto end_test;
+
+ client[i] = rq;
+ }
+
+ /* Continuously preempt the set of 3 running contexts */
+ for (i = 1; i <= NUM_GPR; i++) {
+ err = preempt_user(engine, global, i);
+ if (err)
+ goto end_test;
+ }
+
+ if (READ_ONCE(result[0]) != NUM_GPR) {
+ pr_err("%s: Failed to release semaphore\n",
+ engine->name);
+ err = -EIO;
+ goto end_test;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ int gpr;
+
+ if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
+ err = -ETIME;
+ goto end_test;
+ }
+
+ for (gpr = 1; gpr < NUM_GPR; gpr++) {
+ if (result[NUM_GPR * i + gpr] != 1) {
+ pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
+ engine->name,
+ i, gpr, result[NUM_GPR * i + gpr]);
+ err = -EINVAL;
+ goto end_test;
+ }
+ }
+ }
+
+end_test:
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ if (!client[i])
+ break;
+
+ i915_request_put(client[i]);
+ }
+
+ /* Flush the semaphores on error */
+ smp_store_mb(result[0], -1);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
+ return err;
+}
+
static int live_preempt_timeout(void *arg)
{
struct intel_gt *gt = arg;
@@ -3224,7 +3609,7 @@ static int live_virtual_engine(void *arg)
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
for_each_engine(engine, gt, id) {
@@ -3357,7 +3742,7 @@ static int live_virtual_mask(void *arg)
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -3499,7 +3884,7 @@ static int live_virtual_preserved(void *arg)
* are preserved.
*/
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
/* As we use CS_GPR we cannot run before they existed on all engines. */
@@ -3729,7 +4114,7 @@ static int live_virtual_bond(void *arg)
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -3890,7 +4275,7 @@ static int live_virtual_reset(void *arg)
* forgotten.
*/
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
if (!intel_has_reset_engine(gt))
@@ -3939,6 +4324,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_timeout),
+ SUBTEST(live_preempt_user),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
@@ -3956,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
return intel_gt_live_subtests(tests, &i915->gt);
}
-static void hexdump(const void *buf, size_t len)
-{
- const size_t rowsize = 8 * sizeof(u32);
- const void *prev = NULL;
- bool skip = false;
- size_t pos;
-
- for (pos = 0; pos < len; pos += rowsize) {
- char line[128];
-
- if (prev && !memcmp(prev, buf + pos, rowsize)) {
- if (!skip) {
- pr_info("*\n");
- skip = true;
- }
- continue;
- }
-
- WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
- rowsize, sizeof(u32),
- line, sizeof(line),
- false) >= sizeof(line));
- pr_info("[%04zx] %s\n", pos, line);
-
- prev = buf + pos;
- skip = false;
- }
-}
-
static int emit_semaphore_signal(struct intel_context *ce, void *slot)
{
const u32 offset =
@@ -4015,6 +4372,32 @@ static int emit_semaphore_signal(struct intel_context *ce, void *slot)
return 0;
}
+static int context_flush(struct intel_context *ce, long timeout)
+{
+ struct i915_request *rq;
+ struct dma_fence *fence;
+ int err = 0;
+
+ rq = intel_engine_create_kernel_request(ce->engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ fence = i915_active_fence_get(&ce->timeline->last_request);
+ if (fence) {
+ i915_request_await_dma_fence(rq, fence);
+ dma_fence_put(fence);
+ }
+
+ rq = i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, timeout) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ rmb(); /* We know the request is written, make sure all state is too! */
+ return err;
+}
+
static int live_lrc_layout(void *arg)
{
struct intel_gt *gt = arg;
@@ -4040,13 +4423,12 @@ static int live_lrc_layout(void *arg)
if (!engine->default_state)
continue;
- hw = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
+ hw = shmem_pin_map(engine->default_state);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
break;
}
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
engine->kernel_context,
@@ -4107,13 +4489,13 @@ static int live_lrc_layout(void *arg)
if (err) {
pr_info("%s: HW register image:\n", engine->name);
- hexdump(hw, PAGE_SIZE);
+ igt_hexdump(hw, PAGE_SIZE);
pr_info("%s: SW register image:\n", engine->name);
- hexdump(lrc, PAGE_SIZE);
+ igt_hexdump(lrc, PAGE_SIZE);
}
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_unpin_map(engine->default_state, hw);
if (err)
break;
}
@@ -4182,10 +4564,35 @@ static int live_lrc_fixed(void *arg)
"BB_STATE"
},
{
+ i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
+ lrc_ring_wa_bb_per_ctx(engine),
+ "RING_BB_PER_CTX_PTR"
+ },
+ {
+ i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
+ lrc_ring_indirect_ptr(engine),
+ "RING_INDIRECT_CTX_PTR"
+ },
+ {
+ i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
+ lrc_ring_indirect_offset(engine),
+ "RING_INDIRECT_CTX_OFFSET"
+ },
+ {
i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
CTX_TIMESTAMP - 1,
"RING_CTX_TIMESTAMP"
},
+ {
+ i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
+ lrc_ring_gpr0(engine),
+ "RING_CS_GPR0"
+ },
+ {
+ i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
+ lrc_ring_cmd_buf_cctl(engine),
+ "RING_CMD_BUF_CCTL"
+ },
{ },
}, *t;
u32 *hw;
@@ -4193,13 +4600,12 @@ static int live_lrc_fixed(void *arg)
if (!engine->default_state)
continue;
- hw = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
+ hw = shmem_pin_map(engine->default_state);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
break;
}
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
for (t = tbl; t->name; t++) {
int dw = find_offset(hw, t->reg);
@@ -4215,7 +4621,7 @@ static int live_lrc_fixed(void *arg)
}
}
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_unpin_map(engine->default_state, hw);
}
return err;
@@ -4638,18 +5044,10 @@ static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
wmb();
}
- if (i915_request_wait(rq, 0, HZ / 2) < 0) {
- err = -ETIME;
- goto err;
- }
-
- /* and wait for switch to kernel */
- if (igt_flush_test(arg->engine->i915)) {
- err = -EIO;
+ /* And wait for switch to kernel (to save our context to memory) */
+ err = context_flush(arg->ce[0], HZ / 2);
+ if (err)
goto err;
- }
-
- rmb();
if (!timestamp_advanced(arg->poison, slot[1])) {
pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
@@ -4674,9 +5072,9 @@ err:
static int live_lrc_timestamp(void *arg)
{
+ struct lrc_timestamp data = {};
struct intel_gt *gt = arg;
enum intel_engine_id id;
- struct lrc_timestamp data;
const u32 poison[] = {
0,
S32_MAX,
@@ -4748,6 +5146,860 @@ err:
return 0;
}
+static struct i915_vma *
+create_user_vma(struct i915_address_space *vm, unsigned long size)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(vm->i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static struct i915_vma *
+store_context(struct intel_context *ce, struct i915_vma *scratch)
+{
+ struct i915_vma *batch;
+ u32 dw, x, *cs, *hw;
+ u32 *defaults;
+
+ batch = create_user_vma(ce->vm, SZ_64K);
+ if (IS_ERR(batch))
+ return batch;
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(batch);
+ return ERR_CAST(cs);
+ }
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ x = 0;
+ dw = 0;
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ while (len--) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = hw[dw];
+ *cs++ = lower_32_bits(scratch->node.start + x);
+ *cs++ = upper_32_bits(scratch->node.start + x);
+
+ dw += 2;
+ x += 4;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+
+ return batch;
+}
+
+static int move_to_active(struct i915_request *rq,
+ struct i915_vma *vma,
+ unsigned int flags)
+{
+ int err;
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, flags);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, flags);
+ i915_vma_unlock(vma);
+
+ return err;
+}
+
+static struct i915_request *
+record_registers(struct intel_context *ce,
+ struct i915_vma *before,
+ struct i915_vma *after,
+ u32 *sema)
+{
+ struct i915_vma *b_before, *b_after;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+
+ b_before = store_context(ce, before);
+ if (IS_ERR(b_before))
+ return ERR_CAST(b_before);
+
+ b_after = store_context(ce, after);
+ if (IS_ERR(b_after)) {
+ rq = ERR_CAST(b_after);
+ goto err_before;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto err_after;
+
+ err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, b_before, 0);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, b_after, 0);
+ if (err)
+ goto err_rq;
+
+ cs = intel_ring_begin(rq, 14);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(b_before->node.start);
+ *cs++ = upper_32_bits(b_before->node.start);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(sema);
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(b_after->node.start);
+ *cs++ = upper_32_bits(b_after->node.start);
+
+ intel_ring_advance(rq, cs);
+
+ WRITE_ONCE(*sema, 0);
+ i915_request_get(rq);
+ i915_request_add(rq);
+err_after:
+ i915_vma_put(b_after);
+err_before:
+ i915_vma_put(b_before);
+ return rq;
+
+err_rq:
+ i915_request_add(rq);
+ rq = ERR_PTR(err);
+ goto err_after;
+}
+
+static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
+{
+ struct i915_vma *batch;
+ u32 dw, *cs, *hw;
+ u32 *defaults;
+
+ batch = create_user_vma(ce->vm, SZ_64K);
+ if (IS_ERR(batch))
+ return batch;
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(batch);
+ return ERR_CAST(cs);
+ }
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dw = 0;
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ *cs++ = MI_LOAD_REGISTER_IMM(len);
+ while (len--) {
+ *cs++ = hw[dw];
+ *cs++ = poison;
+ dw += 2;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+
+ return batch;
+}
+
+static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
+{
+ struct i915_request *rq;
+ struct i915_vma *batch;
+ u32 *cs;
+ int err;
+
+ batch = load_context(ce, poison);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_batch;
+ }
+
+ err = move_to_active(rq, batch, 0);
+ if (err)
+ goto err_rq;
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(batch->node.start);
+ *cs++ = upper_32_bits(batch->node.start);
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(sema);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+err_rq:
+ i915_request_add(rq);
+err_batch:
+ i915_vma_put(batch);
+ return err;
+}
+
+static bool is_moving(u32 a, u32 b)
+{
+ return a != b;
+}
+
+static int compare_isolation(struct intel_engine_cs *engine,
+ struct i915_vma *ref[2],
+ struct i915_vma *result[2],
+ struct intel_context *ce,
+ u32 poison)
+{
+ u32 x, dw, *hw, *lrc;
+ u32 *A[2], *B[2];
+ u32 *defaults;
+ int err = 0;
+
+ A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
+ if (IS_ERR(A[0]))
+ return PTR_ERR(A[0]);
+
+ A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
+ if (IS_ERR(A[1])) {
+ err = PTR_ERR(A[1]);
+ goto err_A0;
+ }
+
+ B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
+ if (IS_ERR(B[0])) {
+ err = PTR_ERR(B[0]);
+ goto err_A1;
+ }
+
+ B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
+ if (IS_ERR(B[1])) {
+ err = PTR_ERR(B[1]);
+ goto err_B0;
+ }
+
+ lrc = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(engine->i915));
+ if (IS_ERR(lrc)) {
+ err = PTR_ERR(lrc);
+ goto err_B1;
+ }
+ lrc += LRC_STATE_OFFSET / sizeof(*hw);
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ err = -ENOMEM;
+ goto err_lrc;
+ }
+
+ x = 0;
+ dw = 0;
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ while (len--) {
+ if (!is_moving(A[0][x], A[1][x]) &&
+ (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
+ switch (hw[dw] & 4095) {
+ case 0x30: /* RING_HEAD */
+ case 0x34: /* RING_TAIL */
+ break;
+
+ default:
+ pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
+ engine->name, dw,
+ hw[dw], hw[dw + 1],
+ A[0][x], B[0][x], B[1][x],
+ poison, lrc[dw + 1]);
+ err = -EINVAL;
+ }
+ }
+ dw += 2;
+ x++;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
+ i915_gem_object_unpin_map(ce->state->obj);
+err_B1:
+ i915_gem_object_unpin_map(result[1]->obj);
+err_B0:
+ i915_gem_object_unpin_map(result[0]->obj);
+err_A1:
+ i915_gem_object_unpin_map(ref[1]->obj);
+err_A0:
+ i915_gem_object_unpin_map(ref[0]->obj);
+ return err;
+}
+
+static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
+{
+ u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
+ struct i915_vma *ref[2], *result[2];
+ struct intel_context *A, *B;
+ struct i915_request *rq;
+ int err;
+
+ A = intel_context_create(engine);
+ if (IS_ERR(A))
+ return PTR_ERR(A);
+
+ B = intel_context_create(engine);
+ if (IS_ERR(B)) {
+ err = PTR_ERR(B);
+ goto err_A;
+ }
+
+ ref[0] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(ref[0])) {
+ err = PTR_ERR(ref[0]);
+ goto err_B;
+ }
+
+ ref[1] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(ref[1])) {
+ err = PTR_ERR(ref[1]);
+ goto err_ref0;
+ }
+
+ rq = record_registers(A, ref[0], ref[1], sema);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ref1;
+ }
+
+ WRITE_ONCE(*sema, 1);
+ wmb();
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ref1;
+ }
+ i915_request_put(rq);
+
+ result[0] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(result[0])) {
+ err = PTR_ERR(result[0]);
+ goto err_ref1;
+ }
+
+ result[1] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(result[1])) {
+ err = PTR_ERR(result[1]);
+ goto err_result0;
+ }
+
+ rq = record_registers(A, result[0], result[1], sema);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_result1;
+ }
+
+ err = poison_registers(B, poison, sema);
+ if (err) {
+ WRITE_ONCE(*sema, -1);
+ i915_request_put(rq);
+ goto err_result1;
+ }
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_result1;
+ }
+ i915_request_put(rq);
+
+ err = compare_isolation(engine, ref, result, A, poison);
+
+err_result1:
+ i915_vma_put(result[1]);
+err_result0:
+ i915_vma_put(result[0]);
+err_ref1:
+ i915_vma_put(ref[1]);
+err_ref0:
+ i915_vma_put(ref[0]);
+err_B:
+ intel_context_put(B);
+err_A:
+ intel_context_put(A);
+ return err;
+}
+
+static bool skip_isolation(const struct intel_engine_cs *engine)
+{
+ if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
+ return true;
+
+ if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
+ return true;
+
+ return false;
+}
+
+static int live_lrc_isolation(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const u32 poison[] = {
+ STACK_MAGIC,
+ 0x3a3a3a3a,
+ 0x5c5c5c5c,
+ 0xffffffff,
+ 0xffff0000,
+ };
+ int err = 0;
+
+ /*
+ * Our goal is try and verify that per-context state cannot be
+ * tampered with by another non-privileged client.
+ *
+ * We take the list of context registers from the LRI in the default
+ * context image and attempt to modify that list from a remote context.
+ */
+
+ for_each_engine(engine, gt, id) {
+ int i;
+
+ /* Just don't even ask */
+ if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
+ skip_isolation(engine))
+ continue;
+
+ intel_engine_pm_get(engine);
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ int result;
+
+ result = __lrc_isolation(engine, poison[i]);
+ if (result && !err)
+ err = result;
+
+ result = __lrc_isolation(engine, ~poison[i]);
+ if (result && !err)
+ err = result;
+ }
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int indirect_ctx_submit_req(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+
+ i915_request_put(rq);
+
+ return err;
+}
+
+#define CTX_BB_CANARY_OFFSET (3 * 1024)
+#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
+
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(RING_START(0));
+ *cs++ = i915_ggtt_offset(ce->state) +
+ context_wa_bb_offset(ce) +
+ CTX_BB_CANARY_OFFSET;
+ *cs++ = 0;
+
+ return cs;
+}
+
+static void
+indirect_ctx_bb_setup(struct intel_context *ce)
+{
+ u32 *cs = context_indirect_bb(ce);
+
+ cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
+
+ setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+}
+
+static bool check_ring_start(struct intel_context *ce)
+{
+ const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
+ LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+
+ if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
+ return true;
+
+ pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
+ ctx_bb[CTX_BB_CANARY_INDEX],
+ ce->lrc_reg_state[CTX_RING_START]);
+
+ return false;
+}
+
+static int indirect_ctx_bb_check(struct intel_context *ce)
+{
+ int err;
+
+ err = indirect_ctx_submit_req(ce);
+ if (err)
+ return err;
+
+ if (!check_ring_start(ce))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+{
+ struct intel_context *a, *b;
+ int err;
+
+ a = intel_context_create(engine);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+ err = intel_context_pin(a);
+ if (err)
+ goto put_a;
+
+ b = intel_context_create(engine);
+ if (IS_ERR(b)) {
+ err = PTR_ERR(b);
+ goto unpin_a;
+ }
+ err = intel_context_pin(b);
+ if (err)
+ goto put_b;
+
+ /* We use the already reserved extra page in context state */
+ if (!a->wa_bb_page) {
+ GEM_BUG_ON(b->wa_bb_page);
+ GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
+ goto unpin_b;
+ }
+
+ /*
+ * In order to test that our per context bb is truly per context,
+ * and executes at the intended spot on context restoring process,
+ * make the batch store the ring start value to memory.
+ * As ring start is restored apriori of starting the indirect ctx bb and
+ * as it will be different for each context, it fits to this purpose.
+ */
+ indirect_ctx_bb_setup(a);
+ indirect_ctx_bb_setup(b);
+
+ err = indirect_ctx_bb_check(a);
+ if (err)
+ goto unpin_b;
+
+ err = indirect_ctx_bb_check(b);
+
+unpin_b:
+ intel_context_unpin(b);
+put_b:
+ intel_context_put(b);
+unpin_a:
+ intel_context_unpin(a);
+put_a:
+ intel_context_put(a);
+
+ return err;
+}
+
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_lrc_indirect_ctx_bb(engine);
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static void garbage_reset(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ tasklet_disable(&engine->execlists.tasklet);
+
+ if (!rq->fence.error)
+ intel_engine_reset(engine, NULL);
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static struct i915_request *garbage(struct intel_context *ce,
+ struct rnd_state *prng)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = intel_context_pin(ce);
+ if (err)
+ return ERR_PTR(err);
+
+ prandom_bytes_state(prng,
+ ce->lrc_reg_state,
+ ce->engine->context_size -
+ LRC_STATE_OFFSET);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ return rq;
+
+err_unpin:
+ intel_context_unpin(ce);
+ return ERR_PTR(err);
+}
+
+static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+ struct intel_context *ce;
+ struct i915_request *hang;
+ int err = 0;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ hang = garbage(ce, prng);
+ if (IS_ERR(hang)) {
+ err = PTR_ERR(hang);
+ goto err_ce;
+ }
+
+ if (wait_for_submit(engine, hang, HZ / 2)) {
+ i915_request_put(hang);
+ err = -ETIME;
+ goto err_ce;
+ }
+
+ intel_context_set_banned(ce);
+ garbage_reset(engine, hang);
+
+ intel_engine_flush_submission(engine);
+ if (!hang->fence.error) {
+ i915_request_put(hang);
+ pr_err("%s: corrupted context was not reset\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_ce;
+ }
+
+ if (i915_request_wait(hang, 0, HZ / 2) < 0) {
+ pr_err("%s: corrupted context did not recover\n",
+ engine->name);
+ i915_request_put(hang);
+ err = -EIO;
+ goto err_ce;
+ }
+ i915_request_put(hang);
+
+err_ce:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_garbage(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * Verify that we can recover if one context state is completely
+ * corrupted.
+ */
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ I915_RND_STATE(prng);
+ int err = 0, i;
+
+ if (!intel_has_reset_engine(engine->gt))
+ continue;
+
+ intel_engine_pm_get(engine);
+ for (i = 0; i < 3; i++) {
+ err = __lrc_garbage(engine, &prng);
+ if (err)
+ break;
+ }
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
{
struct intel_context *ce;
@@ -4845,8 +6097,11 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_lrc_fixed),
SUBTEST(live_lrc_state),
SUBTEST(live_lrc_gpr),
+ SUBTEST(live_lrc_isolation),
SUBTEST(live_lrc_timestamp),
+ SUBTEST(live_lrc_garbage),
SUBTEST(live_pphwsp_runtime),
+ SUBTEST(live_lrc_indirect_ctx_bb),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 5f7e2dcf5686..2dc460624bbc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -11,12 +11,30 @@
#include "selftest_rc6.h"
#include "selftests/i915_random.h"
+#include "selftests/librapl.h"
+
+static u64 rc6_residency(struct intel_rc6 *rc6)
+{
+ u64 result;
+
+ /* XXX VLV_GT_MEDIA_RC6? */
+
+ result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ if (HAS_RC6p(rc6_to_i915(rc6)))
+ result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p);
+ if (HAS_RC6pp(rc6_to_i915(rc6)))
+ result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp);
+
+ return result;
+}
int live_rc6_manual(void *arg)
{
struct intel_gt *gt = arg;
struct intel_rc6 *rc6 = &gt->rc6;
+ u64 rc0_power, rc6_power;
intel_wakeref_t wakeref;
+ ktime_t dt;
u64 res[2];
int err = 0;
@@ -38,9 +56,14 @@ int live_rc6_manual(void *arg)
__intel_rc6_disable(rc6);
msleep(1); /* wakeup is not immediate, takes about 100us on icl */
- res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ res[0] = rc6_residency(rc6);
+
+ dt = ktime_get();
+ rc0_power = librapl_energy_uJ();
msleep(250);
- res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ rc0_power = librapl_energy_uJ() - rc0_power;
+ dt = ktime_sub(ktime_get(), dt);
+ res[1] = rc6_residency(rc6);
if ((res[1] - res[0]) >> 10) {
pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
(res[1] - res[0]) >> 10);
@@ -48,13 +71,24 @@ int live_rc6_manual(void *arg)
goto out_unlock;
}
+ rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt));
+ if (!rc0_power) {
+ pr_err("No power measured while in RC0\n");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
/* Manually enter RC6 */
intel_rc6_park(rc6);
- res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ res[0] = rc6_residency(rc6);
+ intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
+ dt = ktime_get();
+ rc6_power = librapl_energy_uJ();
msleep(100);
- res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
-
+ rc6_power = librapl_energy_uJ() - rc6_power;
+ dt = ktime_sub(ktime_get(), dt);
+ res[1] = rc6_residency(rc6);
if (res[1] == res[0]) {
pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n",
intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
@@ -63,6 +97,15 @@ int live_rc6_manual(void *arg)
err = -EINVAL;
}
+ rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, ktime_to_ns(dt));
+ pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+ rc0_power, rc6_power);
+ if (2 * rc6_power > rc0_power) {
+ pr_err("GPU leaked energy while in RC6!\n");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
/* Restore what should have been the original state! */
intel_rc6_unpark(rc6);
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 6ad6aca315f6..35406ecdf0b2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -115,7 +115,7 @@ static int igt_atomic_engine_reset(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
intel_gt_pm_get(gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
new file mode 100644
index 000000000000..3350e7c995bc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static struct i915_vma *create_wally(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ err = i915_vma_sync(vma);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_gem_object_put(obj);
+ return ERR_CAST(cs);
+ }
+
+ if (INTEL_GEN(engine->i915) >= 6) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4;
+ *cs++ = 0;
+ } else if (INTEL_GEN(engine->i915) >= 4) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = 0;
+ } else {
+ *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ }
+ *cs++ = vma->node.start + 4000;
+ *cs++ = STACK_MAGIC;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ vma->private = intel_context_create(engine); /* dummy residuals */
+ if (IS_ERR(vma->private)) {
+ vma = ERR_CAST(vma->private);
+ i915_gem_object_put(obj);
+ }
+
+ return vma;
+}
+
+static int context_sync(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int new_context_sync(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = context_sync(ce);
+ intel_context_put(ce);
+
+ return err;
+}
+
+static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result)
+{
+ int pass;
+ int err;
+
+ for (pass = 0; pass < 2; pass++) {
+ WRITE_ONCE(*result, 0);
+ err = context_sync(engine->kernel_context);
+ if (err || READ_ONCE(*result)) {
+ if (!err) {
+ pr_err("pass[%d] wa_bb emitted for the kernel context\n",
+ pass);
+ err = -EINVAL;
+ }
+ return err;
+ }
+
+ WRITE_ONCE(*result, 0);
+ err = new_context_sync(engine);
+ if (READ_ONCE(*result) != STACK_MAGIC) {
+ if (!err) {
+ pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n",
+ pass);
+ err = -EINVAL;
+ }
+ return err;
+ }
+
+ WRITE_ONCE(*result, 0);
+ err = new_context_sync(engine);
+ if (READ_ONCE(*result) != STACK_MAGIC) {
+ if (!err) {
+ pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n",
+ pass);
+ err = -EINVAL;
+ }
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+ struct intel_context *ce;
+ int err, i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ for (i = 0; i < 2; i++) {
+ WRITE_ONCE(*result, 0);
+ err = context_sync(ce);
+ if (err)
+ break;
+ }
+ intel_context_put(ce);
+ if (err)
+ return err;
+
+ if (READ_ONCE(*result)) {
+ pr_err("wa_bb emitted between the same user context\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+ struct intel_context *ce;
+ int err, i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ for (i = 0; i < 2; i++) {
+ WRITE_ONCE(*result, 0);
+ err = context_sync(ce);
+ if (err)
+ break;
+
+ err = context_sync(engine->kernel_context);
+ if (err)
+ break;
+ }
+ intel_context_put(ce);
+ if (err)
+ return err;
+
+ if (READ_ONCE(*result)) {
+ pr_err("wa_bb emitted between the same user context [with intervening kernel]\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __live_ctx_switch_wa(struct intel_engine_cs *engine)
+{
+ struct i915_vma *bb;
+ u32 *result;
+ int err;
+
+ bb = create_wally(engine);
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
+
+ result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC);
+ if (IS_ERR(result)) {
+ intel_context_put(bb->private);
+ i915_vma_unpin_and_release(&bb, 0);
+ return PTR_ERR(result);
+ }
+ result += 1000;
+
+ engine->wa_ctx.vma = bb;
+
+ err = mixed_contexts_sync(engine, result);
+ if (err)
+ goto out;
+
+ err = double_context_sync_00(engine, result);
+ if (err)
+ goto out;
+
+ err = kernel_context_sync_00(engine, result);
+ if (err)
+ goto out;
+
+out:
+ intel_context_put(engine->wa_ctx.vma->private);
+ i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP);
+ return err;
+}
+
+static int live_ctx_switch_wa(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * Exercise the inter-context wa batch.
+ *
+ * Between each user context we run a wa batch, and since it may
+ * have implications for user visible state, we have to check that
+ * we do actually execute it.
+ *
+ * The trick we use is to replace the normal wa batch with a custom
+ * one that writes to a marker within it, and we can then look for
+ * that marker to confirm if the batch was run when we expect it,
+ * and equally important it was wasn't run when we don't!
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct i915_vma *saved_wa;
+ int err;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (IS_GEN_RANGE(gt->i915, 4, 5))
+ continue; /* MI_STORE_DWORD is privileged! */
+
+ saved_wa = fetch_and_zero(&engine->wa_ctx.vma);
+
+ intel_engine_pm_get(engine);
+ err = __live_ctx_switch_wa(engine);
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ engine->wa_ctx.vma = saved_wa;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_ctx_switch_wa),
+ };
+
+ if (HAS_EXECLISTS(i915))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
new file mode 100644
index 000000000000..6275d69aa9cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/pm_qos.h>
+#include <linux/sort.h>
+
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt_clock_utils.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "selftest_rps.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/librapl.h"
+
+/* Try to isolate the impact of cstates from determing frequency response */
+#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
+
+static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
+{
+ unsigned long old;
+
+ old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+
+ return old;
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
+static void dummy_rps_work(struct work_struct *wrk)
+{
+}
+
+static int cmp_u64(const void *A, const void *B)
+{
+ const u64 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static struct i915_vma *
+create_spin_counter(struct intel_engine_cs *engine,
+ struct i915_address_space *vm,
+ bool srm,
+ u32 **cancel,
+ u32 **counter)
+{
+ enum {
+ COUNT,
+ INC,
+ __NGPR__,
+ };
+#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned long end;
+ u32 *base, *cs;
+ int loop, i;
+ int err;
+
+ obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ end = obj->base.size / sizeof(u32) - 1;
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ base = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(base)) {
+ i915_gem_object_put(obj);
+ return ERR_CAST(base);
+ }
+ cs = base;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
+ for (i = 0; i < __NGPR__; i++) {
+ *cs++ = i915_mmio_reg_offset(CS_GPR(i));
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
+ *cs++ = 1;
+
+ loop = cs - base;
+
+ /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
+ for (i = 0; i < 1024; i++) {
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+ if (srm) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+ *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
+ }
+ }
+
+ *cs++ = MI_BATCH_BUFFER_START_GEN8;
+ *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+ GEM_BUG_ON(cs - base > end);
+
+ i915_gem_object_flush_map(obj);
+
+ *cancel = base + loop;
+ *counter = srm ? memset32(base + end, 0, 1) : NULL;
+ return vma;
+}
+
+static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
+{
+ u8 history[64], i;
+ unsigned long end;
+ int sleep;
+
+ i = 0;
+ memset(history, freq, sizeof(history));
+ sleep = 20;
+
+ /* The PCU does not change instantly, but drifts towards the goal? */
+ end = jiffies + msecs_to_jiffies(timeout_ms);
+ do {
+ u8 act;
+
+ act = read_cagf(rps);
+ if (time_after(jiffies, end))
+ return act;
+
+ /* Target acquired */
+ if (act == freq)
+ return act;
+
+ /* Any change within the last N samples? */
+ if (!memchr_inv(history, act, sizeof(history)))
+ return act;
+
+ history[i] = act;
+ i = (i + 1) % ARRAY_SIZE(history);
+
+ usleep_range(sleep, 2 * sleep);
+ sleep *= 2;
+ if (sleep > timeout_ms * 20)
+ sleep = timeout_ms * 20;
+ } while (1);
+}
+
+static u8 rps_set_check(struct intel_rps *rps, u8 freq)
+{
+ mutex_lock(&rps->lock);
+ GEM_BUG_ON(!intel_rps_is_active(rps));
+ intel_rps_set(rps, freq);
+ GEM_BUG_ON(rps->last_freq != freq);
+ mutex_unlock(&rps->lock);
+
+ return wait_for_freq(rps, freq, 50);
+}
+
+static void show_pstate_limits(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (IS_BROXTON(i915)) {
+ pr_info("P_STATE_CAP[%x]: 0x%08x\n",
+ i915_mmio_reg_offset(BXT_RP_STATE_CAP),
+ intel_uncore_read(rps_to_uncore(rps),
+ BXT_RP_STATE_CAP));
+ } else if (IS_GEN(i915, 9)) {
+ pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
+ i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
+ intel_uncore_read(rps_to_uncore(rps),
+ GEN9_RP_STATE_LIMITS));
+ }
+}
+
+int live_rps_clock_interval(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ intel_gt_pm_get(gt);
+ intel_rps_disable(&gt->rps);
+
+ intel_gt_check_clock_frequency(gt);
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ u32 cycles;
+ u64 dt;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
+
+ /* Set the evaluation interval to infinity! */
+ intel_uncore_write_fw(gt->uncore,
+ GEN6_RP_UP_EI, 0xffffffff);
+ intel_uncore_write_fw(gt->uncore,
+ GEN6_RP_UP_THRESHOLD, 0xffffffff);
+
+ intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
+ GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
+
+ if (wait_for(intel_uncore_read_fw(gt->uncore,
+ GEN6_RP_CUR_UP_EI),
+ 10)) {
+ /* Just skip the test; assume lack of HW support */
+ pr_notice("%s: rps evaluation interval not ticking\n",
+ engine->name);
+ err = -ENODEV;
+ } else {
+ ktime_t dt_[5];
+ u32 cycles_[5];
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ preempt_disable();
+
+ dt_[i] = ktime_get();
+ cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+ udelay(1000);
+
+ dt_[i] = ktime_sub(ktime_get(), dt_[i]);
+ cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+ preempt_enable();
+ }
+
+ /* Use the median of both cycle/dt; close enough */
+ sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
+ cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
+ sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
+ dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
+ }
+
+ intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+
+ if (err == 0) {
+ u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
+ u32 expected =
+ intel_gt_ns_to_pm_interval(gt, dt);
+
+ pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
+ engine->name, cycles, time, dt, expected,
+ gt->clock_frequency / 1000);
+
+ if (10 * time < 8 * dt ||
+ 8 * time > 10 * dt) {
+ pr_err("%s: rps clock time does not match walltime!\n",
+ engine->name);
+ err = -EINVAL;
+ }
+
+ if (10 * expected < 8 * cycles ||
+ 8 * expected > 10 * cycles) {
+ pr_err("%s: walltime does not match rps clock ticks!\n",
+ engine->name);
+ err = -EINVAL;
+ }
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ break; /* once is enough */
+ }
+
+ intel_rps_enable(&gt->rps);
+ intel_gt_pm_put(gt);
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ if (err == -ENODEV) /* skipped, don't report a fail */
+ err = 0;
+
+ return err;
+}
+
+int live_rps_control(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * Check that the actual frequency matches our requested frequency,
+ * to verify our control mechanism. We have to be careful that the
+ * PCU may throttle the GPU in which case the actual frequency used
+ * will be lowered than requested.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ intel_gt_pm_get(gt);
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ ktime_t min_dt, max_dt;
+ int f, limit;
+ int min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+ pr_err("%s: could not set minimum frequency [%x], only %x!\n",
+ engine->name, rps->min_freq, read_cagf(rps));
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ show_pstate_limits(rps);
+ err = -EINVAL;
+ break;
+ }
+
+ for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
+ if (rps_set_check(rps, f) < f)
+ break;
+ }
+
+ limit = rps_set_check(rps, f);
+
+ if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+ pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
+ engine->name, rps->min_freq, read_cagf(rps));
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ show_pstate_limits(rps);
+ err = -EINVAL;
+ break;
+ }
+
+ max_dt = ktime_get();
+ max = rps_set_check(rps, limit);
+ max_dt = ktime_sub(ktime_get(), max_dt);
+
+ min_dt = ktime_get();
+ min = rps_set_check(rps, rps->min_freq);
+ min_dt = ktime_sub(ktime_get(), min_dt);
+
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+
+ pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
+ engine->name,
+ rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
+ rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
+ limit, intel_gpu_freq(rps, limit),
+ min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
+
+ if (limit == rps->min_freq) {
+ pr_err("%s: GPU throttled to minimum!\n",
+ engine->name);
+ show_pstate_limits(rps);
+ err = -ENODEV;
+ break;
+ }
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+ intel_gt_pm_put(gt);
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+static void show_pcu_config(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ unsigned int max_gpu_freq, min_gpu_freq;
+ intel_wakeref_t wakeref;
+ int gpu_freq;
+
+ if (!HAS_LLC(i915))
+ return;
+
+ min_gpu_freq = rps->min_freq;
+ max_gpu_freq = rps->max_freq;
+ if (INTEL_GEN(i915) >= 9) {
+ /* Convert GT frequency to 50 HZ units */
+ min_gpu_freq /= GEN9_FREQ_SCALER;
+ max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
+
+ pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
+ for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+ int ia_freq = gpu_freq;
+
+ sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &ia_freq, NULL);
+
+ pr_info("%5d %5d %5d\n",
+ gpu_freq * 50,
+ ((ia_freq >> 0) & 0xff) * 100,
+ ((ia_freq >> 8) & 0xff) * 100);
+ }
+
+ intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
+}
+
+static u64 __measure_frequency(u32 *cntr, int duration_ms)
+{
+ u64 dc, dt;
+
+ dt = ktime_get();
+ dc = READ_ONCE(*cntr);
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dc = READ_ONCE(*cntr) - dc;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_frequency(cntr, 2);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
+ int duration_ms)
+{
+ u64 dc, dt;
+
+ dt = ktime_get();
+ dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_cs_frequency_at(struct intel_rps *rps,
+ struct intel_engine_cs *engine,
+ int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_cs_frequency(engine, 2);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
+{
+ return f_d * x > f_n * y && f_n * x < f_d * y;
+}
+
+int live_rps_frequency_cs(void *arg)
+{
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ struct intel_engine_cs *engine;
+ struct pm_qos_request qos;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * The premise is that the GPU does change freqency at our behest.
+ * Let's check there is a correspondence between the requested
+ * frequency, the actual frequency, and the observed clock rate.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+ return 0;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_add_request(&qos, CPU_LATENCY);
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cancel, *cntr;
+ struct {
+ u64 count;
+ int freq;
+ } min, max;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ vma = create_spin_counter(engine,
+ engine->kernel_context->vm, false,
+ &cancel, &cntr);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ break;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_vma;
+
+ if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
+ 10)) {
+ pr_err("%s: timed loop did not start\n",
+ engine->name);
+ goto err_vma;
+ }
+
+ min.freq = rps->min_freq;
+ min.count = measure_cs_frequency_at(rps, engine, &min.freq);
+
+ max.freq = rps->max_freq;
+ max.count = measure_cs_frequency_at(rps, engine, &max.freq);
+
+ pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+ engine->name,
+ min.count, intel_gpu_freq(rps, min.freq),
+ max.count, intel_gpu_freq(rps, max.freq),
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+ max.freq * min.count));
+
+ if (!scaled_within(max.freq * min.count,
+ min.freq * max.count,
+ 2, 3)) {
+ int f;
+
+ pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+ engine->name,
+ max.freq * min.count,
+ min.freq * max.count);
+ show_pcu_config(rps);
+
+ for (f = min.freq + 1; f <= rps->max_freq; f++) {
+ int act = f;
+ u64 count;
+
+ count = measure_cs_frequency_at(rps, engine, &act);
+ if (act < f)
+ break;
+
+ pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
+ engine->name,
+ act, intel_gpu_freq(rps, act), count,
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
+ act * min.count));
+
+ f = act; /* may skip ahead [pcu granularity] */
+ }
+
+ err = -EINVAL;
+ }
+
+err_vma:
+ *cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(vma->obj);
+ i915_gem_object_unpin_map(vma->obj);
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_remove_request(&qos);
+
+ return err;
+}
+
+int live_rps_frequency_srm(void *arg)
+{
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ struct intel_engine_cs *engine;
+ struct pm_qos_request qos;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * The premise is that the GPU does change freqency at our behest.
+ * Let's check there is a correspondence between the requested
+ * frequency, the actual frequency, and the observed clock rate.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+ return 0;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_add_request(&qos, CPU_LATENCY);
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cancel, *cntr;
+ struct {
+ u64 count;
+ int freq;
+ } min, max;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ vma = create_spin_counter(engine,
+ engine->kernel_context->vm, true,
+ &cancel, &cntr);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ break;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_vma;
+
+ if (wait_for(READ_ONCE(*cntr), 10)) {
+ pr_err("%s: timed loop did not start\n",
+ engine->name);
+ goto err_vma;
+ }
+
+ min.freq = rps->min_freq;
+ min.count = measure_frequency_at(rps, cntr, &min.freq);
+
+ max.freq = rps->max_freq;
+ max.count = measure_frequency_at(rps, cntr, &max.freq);
+
+ pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+ engine->name,
+ min.count, intel_gpu_freq(rps, min.freq),
+ max.count, intel_gpu_freq(rps, max.freq),
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+ max.freq * min.count));
+
+ if (!scaled_within(max.freq * min.count,
+ min.freq * max.count,
+ 1, 2)) {
+ int f;
+
+ pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+ engine->name,
+ max.freq * min.count,
+ min.freq * max.count);
+ show_pcu_config(rps);
+
+ for (f = min.freq + 1; f <= rps->max_freq; f++) {
+ int act = f;
+ u64 count;
+
+ count = measure_frequency_at(rps, cntr, &act);
+ if (act < f)
+ break;
+
+ pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
+ engine->name,
+ act, intel_gpu_freq(rps, act), count,
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
+ act * min.count));
+
+ f = act; /* may skip ahead [pcu granularity] */
+ }
+
+ err = -EINVAL;
+ }
+
+err_vma:
+ *cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(vma->obj);
+ i915_gem_object_unpin_map(vma->obj);
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_remove_request(&qos);
+
+ return err;
+}
+
+static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
+{
+ /* Flush any previous EI */
+ usleep_range(timeout_us, 2 * timeout_us);
+
+ /* Reset the interrupt status */
+ rps_disable_interrupts(rps);
+ GEM_BUG_ON(rps->pm_iir);
+ rps_enable_interrupts(rps);
+
+ /* And then wait for the timeout, for real this time */
+ usleep_range(2 * timeout_us, 3 * timeout_us);
+}
+
+static int __rps_up_interrupt(struct intel_rps *rps,
+ struct intel_engine_cs *engine,
+ struct igt_spinner *spin)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ struct i915_request *rq;
+ u32 timeout;
+
+ if (!intel_engine_can_store_dword(engine))
+ return 0;
+
+ rps_set_check(rps, rps->min_freq);
+
+ rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ i915_request_put(rq);
+ intel_gt_set_wedged(engine->gt);
+ return -EIO;
+ }
+
+ if (!intel_rps_is_active(rps)) {
+ pr_err("%s: RPS not enabled on starting spinner\n",
+ engine->name);
+ igt_spinner_end(spin);
+ i915_request_put(rq);
+ return -EINVAL;
+ }
+
+ if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
+ pr_err("%s: RPS did not register UP interrupt\n",
+ engine->name);
+ i915_request_put(rq);
+ return -EINVAL;
+ }
+
+ if (rps->last_freq != rps->min_freq) {
+ pr_err("%s: RPS did not program min frequency\n",
+ engine->name);
+ i915_request_put(rq);
+ return -EINVAL;
+ }
+
+ timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+ timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
+ timeout = DIV_ROUND_UP(timeout, 1000);
+
+ sleep_for_ei(rps, timeout);
+ GEM_BUG_ON(i915_request_completed(rq));
+
+ igt_spinner_end(spin);
+ i915_request_put(rq);
+
+ if (rps->cur_freq != rps->min_freq) {
+ pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
+ engine->name, intel_rps_read_actual_frequency(rps));
+ return -EINVAL;
+ }
+
+ if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
+ pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
+ engine->name, rps->pm_iir,
+ intel_uncore_read(uncore, GEN6_RP_PREV_UP),
+ intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
+ intel_uncore_read(uncore, GEN6_RP_UP_EI));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __rps_down_interrupt(struct intel_rps *rps,
+ struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ u32 timeout;
+
+ rps_set_check(rps, rps->max_freq);
+
+ if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
+ pr_err("%s: RPS did not register DOWN interrupt\n",
+ engine->name);
+ return -EINVAL;
+ }
+
+ if (rps->last_freq != rps->max_freq) {
+ pr_err("%s: RPS did not program max frequency\n",
+ engine->name);
+ return -EINVAL;
+ }
+
+ timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+ timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
+ timeout = DIV_ROUND_UP(timeout, 1000);
+
+ sleep_for_ei(rps, timeout);
+
+ if (rps->cur_freq != rps->max_freq) {
+ pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
+ engine->name,
+ intel_rps_read_actual_frequency(rps));
+ return -EINVAL;
+ }
+
+ if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
+ pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
+ engine->name, rps->pm_iir,
+ intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
+ intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
+ intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
+ intel_uncore_read(uncore, GEN6_RP_PREV_UP),
+ intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
+ intel_uncore_read(uncore, GEN6_RP_UP_EI));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int live_rps_interrupt(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ u32 pm_events;
+ int err = 0;
+
+ /*
+ * First, let's check whether or not we are receiving interrupts.
+ */
+
+ if (!intel_rps_has_interrupts(rps))
+ return 0;
+
+ intel_gt_pm_get(gt);
+ pm_events = rps->pm_events;
+ intel_gt_pm_put(gt);
+ if (!pm_events) {
+ pr_err("No RPS PM events registered, but RPS is enabled?\n");
+ return -ENODEV;
+ }
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ /* Keep the engine busy with a spinner; expect an UP! */
+ if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
+ unsigned long saved_heartbeat;
+
+ intel_gt_pm_wait_for_idle(engine->gt);
+ GEM_BUG_ON(intel_rps_is_active(rps));
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ err = __rps_up_interrupt(rps, engine, &spin);
+
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (err)
+ goto out;
+
+ intel_gt_pm_wait_for_idle(engine->gt);
+ }
+
+ /* Keep the engine awake but idle and check for DOWN */
+ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
+ unsigned long saved_heartbeat;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+ intel_rc6_disable(&gt->rc6);
+
+ err = __rps_down_interrupt(rps, engine);
+
+ intel_rc6_enable(&gt->rc6);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+static u64 __measure_power(int duration_ms)
+{
+ u64 dE, dt;
+
+ dt = ktime_get();
+ dE = librapl_energy_uJ();
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dE = librapl_energy_uJ() - dE;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dE, dt);
+}
+
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_power(5);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+int live_rps_power(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * Our fundamental assumption is that running at lower frequency
+ * actually saves power. Let's see if our RAPL measurement support
+ * that theory.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (!librapl_energy_uJ())
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ struct {
+ u64 power;
+ int freq;
+ } min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ max.freq = rps->max_freq;
+ max.power = measure_power_at(rps, &max.freq);
+
+ min.freq = rps->min_freq;
+ min.power = measure_power_at(rps, &min.freq);
+
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+
+ pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
+ engine->name,
+ min.power, intel_gpu_freq(rps, min.freq),
+ max.power, intel_gpu_freq(rps, max.freq));
+
+ if (10 * min.freq >= 9 * max.freq) {
+ pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
+ min.freq, intel_gpu_freq(rps, min.freq),
+ max.freq, intel_gpu_freq(rps, max.freq));
+ continue;
+ }
+
+ if (11 * min.power > 10 * max.power) {
+ pr_err("%s: did not conserve power when setting lower frequency!\n",
+ engine->name);
+ err = -EINVAL;
+ break;
+ }
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+int live_rps_dynamic(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * We've looked at the bascs, and have established that we
+ * can change the clock frequency and that the HW will generate
+ * interrupts based on load. Now we check how we integrate those
+ * moving parts into dynamic reclocking based on load.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq;
+ struct {
+ ktime_t dt;
+ u8 freq;
+ } min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ intel_gt_pm_wait_for_idle(gt);
+ GEM_BUG_ON(intel_rps_is_active(rps));
+ rps->cur_freq = rps->min_freq;
+
+ intel_engine_pm_get(engine);
+ intel_rc6_disable(&gt->rc6);
+ GEM_BUG_ON(rps->last_freq != rps->min_freq);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err;
+ }
+
+ i915_request_add(rq);
+
+ max.dt = ktime_get();
+ max.freq = wait_for_freq(rps, rps->max_freq, 500);
+ max.dt = ktime_sub(ktime_get(), max.dt);
+
+ igt_spinner_end(&spin);
+
+ min.dt = ktime_get();
+ min.freq = wait_for_freq(rps, rps->min_freq, 2000);
+ min.dt = ktime_sub(ktime_get(), min.dt);
+
+ pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
+ engine->name,
+ max.freq, intel_gpu_freq(rps, max.freq),
+ ktime_to_ns(max.dt),
+ min.freq, intel_gpu_freq(rps, min.freq),
+ ktime_to_ns(min.dt));
+ if (min.freq >= max.freq) {
+ pr_err("%s: dynamic reclocking of spinner failed\n!",
+ engine->name);
+ err = -EINVAL;
+ }
+
+err:
+ intel_rc6_enable(&gt->rc6);
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
new file mode 100644
index 000000000000..6e82a631cfa1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SELFTEST_RPS_H
+#define SELFTEST_RPS_H
+
+int live_rps_control(void *arg);
+int live_rps_clock_interval(void *arg);
+int live_rps_frequency_cs(void *arg);
+int live_rps_frequency_srm(void *arg);
+int live_rps_power(void *arg);
+int live_rps_interrupt(void *arg);
+int live_rps_dynamic(void *arg);
+
+#endif /* SELFTEST_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
new file mode 100644
index 000000000000..43c7acbdc79d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/shmem_fs.h>
+
+#include "gem/i915_gem_object.h"
+#include "shmem_utils.h"
+
+struct file *shmem_create_from_data(const char *name, void *data, size_t len)
+{
+ struct file *file;
+ int err;
+
+ file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE);
+ if (IS_ERR(file))
+ return file;
+
+ err = shmem_write(file, 0, data, len);
+ if (err) {
+ fput(file);
+ return ERR_PTR(err);
+ }
+
+ return file;
+}
+
+struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
+{
+ struct file *file;
+ void *ptr;
+
+ if (obj->ops == &i915_gem_shmem_ops) {
+ file = obj->base.filp;
+ atomic_long_inc(&file->f_count);
+ return file;
+ }
+
+ ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(ptr))
+ return ERR_CAST(ptr);
+
+ file = shmem_create_from_data("", ptr, obj->base.size);
+ i915_gem_object_unpin_map(obj);
+
+ return file;
+}
+
+static size_t shmem_npte(struct file *file)
+{
+ return file->f_mapping->host->i_size >> PAGE_SHIFT;
+}
+
+static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
+{
+ unsigned long pfn;
+
+ vunmap(ptr);
+
+ for (pfn = 0; pfn < n_pte; pfn++) {
+ struct page *page;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (!WARN_ON(IS_ERR(page))) {
+ put_page(page);
+ put_page(page);
+ }
+ }
+}
+
+void *shmem_pin_map(struct file *file)
+{
+ const size_t n_pte = shmem_npte(file);
+ pte_t *stack[32], **ptes, **mem;
+ struct vm_struct *area;
+ unsigned long pfn;
+
+ mem = stack;
+ if (n_pte > ARRAY_SIZE(stack)) {
+ mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return NULL;
+ }
+
+ area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
+ if (!area) {
+ if (mem != stack)
+ kvfree(mem);
+ return NULL;
+ }
+
+ ptes = mem;
+ for (pfn = 0; pfn < n_pte; pfn++) {
+ struct page *page;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (IS_ERR(page))
+ goto err_page;
+
+ **ptes++ = mk_pte(page, PAGE_KERNEL);
+ }
+
+ if (mem != stack)
+ kvfree(mem);
+
+ mapping_set_unevictable(file->f_mapping);
+ return area->addr;
+
+err_page:
+ if (mem != stack)
+ kvfree(mem);
+
+ __shmem_unpin_map(file, area->addr, pfn);
+ return NULL;
+}
+
+void shmem_unpin_map(struct file *file, void *ptr)
+{
+ mapping_clear_unevictable(file->f_mapping);
+ __shmem_unpin_map(file, ptr, shmem_npte(file));
+}
+
+static int __shmem_rw(struct file *file, loff_t off,
+ void *ptr, size_t len,
+ bool write)
+{
+ unsigned long pfn;
+
+ for (pfn = off >> PAGE_SHIFT; len; pfn++) {
+ unsigned int this =
+ min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
+ struct page *page;
+ void *vaddr;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ vaddr = kmap(page);
+ if (write)
+ memcpy(vaddr + offset_in_page(off), ptr, this);
+ else
+ memcpy(ptr, vaddr + offset_in_page(off), this);
+ kunmap(page);
+ put_page(page);
+
+ len -= this;
+ ptr += this;
+ off = 0;
+ }
+
+ return 0;
+}
+
+int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
+{
+ return __shmem_rw(file, off, dst, len, false);
+}
+
+int shmem_write(struct file *file, loff_t off, void *src, size_t len)
+{
+ return __shmem_rw(file, off, src, len, true);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "st_shmem_utils.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h
new file mode 100644
index 000000000000..c1669170c351
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SHMEM_UTILS_H
+#define SHMEM_UTILS_H
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct file;
+
+struct file *shmem_create_from_data(const char *name, void *data, size_t len);
+struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
+
+void *shmem_pin_map(struct file *file);
+void shmem_unpin_map(struct file *file, void *ptr);
+
+int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
+int shmem_write(struct file *file, loff_t off, void *src, size_t len);
+
+#endif /* SHMEM_UTILS_H */
diff --git a/drivers/gpu/drm/i915/gt/st_shmem_utils.c b/drivers/gpu/drm/i915/gt/st_shmem_utils.c
new file mode 100644
index 000000000000..b279fe88b70e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/st_shmem_utils.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/* Just a quick and causal check of the shmem_utils API */
+
+static int igt_shmem_basic(void *ignored)
+{
+ u32 datum = 0xdeadbeef, result;
+ struct file *file;
+ u32 *map;
+ int err;
+
+ file = shmem_create_from_data("mock", &datum, sizeof(datum));
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ result = 0;
+ err = shmem_read(file, 0, &result, sizeof(result));
+ if (err)
+ goto out_file;
+
+ if (result != datum) {
+ pr_err("Incorrect read back from shmemfs: %x != %x\n",
+ result, datum);
+ err = -EINVAL;
+ goto out_file;
+ }
+
+ result = 0xc0ffee;
+ err = shmem_write(file, 0, &result, sizeof(result));
+ if (err)
+ goto out_file;
+
+ map = shmem_pin_map(file);
+ if (!map) {
+ err = -ENOMEM;
+ goto out_file;
+ }
+
+ if (*map != result) {
+ pr_err("Incorrect read back via mmap of last write: %x != %x\n",
+ *map, result);
+ err = -EINVAL;
+ goto out_map;
+ }
+
+out_map:
+ shmem_unpin_map(file, map);
+out_file:
+ fput(file);
+ return err;
+}
+
+int shmem_utils_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_shmem_basic),
+ };
+
+ return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
new file mode 100644
index 000000000000..535cc1169e54
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
+#include "sysfs_engines.h"
+
+struct kobj_engine {
+ struct kobject base;
+ struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+ return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static struct kobj_attribute name_attr =
+__ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static struct kobj_attribute class_attr =
+__ATTR(class, 0444, class_show, NULL);
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static struct kobj_attribute inst_attr =
+__ATTR(instance, 0444, inst_show, NULL);
+
+static ssize_t
+mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
+}
+
+static struct kobj_attribute mmio_attr =
+__ATTR(mmio_base, 0444, mmio_show, NULL);
+
+static const char * const vcs_caps[] = {
+ [ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc",
+ [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+};
+
+static const char * const vecs_caps[] = {
+ [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+};
+
+static ssize_t repr_trim(char *buf, ssize_t len)
+{
+ /* Trim off the trailing space and replace with a newline */
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ if (len > 0)
+ buf[len - 1] = '\n';
+
+ return len;
+}
+
+static ssize_t
+__caps_show(struct intel_engine_cs *engine,
+ u32 caps, char *buf, bool show_unknown)
+{
+ const char * const *repr;
+ int count, n;
+ ssize_t len;
+
+ BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities));
+
+ switch (engine->class) {
+ case VIDEO_DECODE_CLASS:
+ repr = vcs_caps;
+ count = ARRAY_SIZE(vcs_caps);
+ break;
+
+ case VIDEO_ENHANCEMENT_CLASS:
+ repr = vecs_caps;
+ count = ARRAY_SIZE(vecs_caps);
+ break;
+
+ default:
+ repr = NULL;
+ count = 0;
+ break;
+ }
+ GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps)));
+
+ len = 0;
+ for_each_set_bit(n,
+ (unsigned long *)&caps,
+ show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) {
+ if (n >= count || !repr[n]) {
+ if (GEM_WARN_ON(show_unknown))
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "[%x] ", n);
+ } else {
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "%s ", repr[n]);
+ }
+ if (GEM_WARN_ON(len >= PAGE_SIZE))
+ break;
+ }
+ return repr_trim(buf, len);
+}
+
+static ssize_t
+caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return __caps_show(engine, engine->uabi_capabilities, buf, true);
+}
+
+static struct kobj_attribute caps_attr =
+__ATTR(capabilities, 0444, caps_show, NULL);
+
+static ssize_t
+all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return __caps_show(kobj_to_engine(kobj), -1, buf, false);
+}
+
+static struct kobj_attribute all_caps_attr =
+__ATTR(known_capabilities, 0444, all_caps_show, NULL);
+
+static ssize_t
+max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long duration;
+ int err;
+
+ /*
+ * When waiting for a request, if is it currently being executed
+ * on the GPU, we busywait for a short while before sleeping. The
+ * premise is that most requests are short, and if it is already
+ * executing then there is a good chance that it will complete
+ * before we can setup the interrupt handler and go to sleep.
+ * We try to offset the cost of going to sleep, by first spinning
+ * on the request -- if it completed in less time than it would take
+ * to go sleep, process the interrupt and return back to the client,
+ * then we have saved the client some latency, albeit at the cost
+ * of spinning on an expensive CPU core.
+ *
+ * While we try to avoid waiting at all for a request that is unlikely
+ * to complete, deciding how long it is worth spinning is for is an
+ * arbitrary decision: trading off power vs latency.
+ */
+
+ err = kstrtoull(buf, 0, &duration);
+ if (err)
+ return err;
+
+ if (duration > jiffies_to_nsecs(2))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
+
+ return count;
+}
+
+static ssize_t
+max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_attr =
+__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
+
+static ssize_t
+max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_def =
+__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL);
+
+static ssize_t
+timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long duration;
+ int err;
+
+ /*
+ * Execlists uses a scheduling quantum (a timeslice) to alternate
+ * execution between ready-to-run contexts of equal priority. This
+ * ensures that all users (though only if they of equal importance)
+ * have the opportunity to run and prevents livelocks where contexts
+ * may have implicit ordering due to userspace semaphores.
+ */
+
+ err = kstrtoull(buf, 0, &duration);
+ if (err)
+ return err;
+
+ if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
+
+ if (execlists_active(&engine->execlists))
+ set_timer_ms(&engine->execlists.timer, duration);
+
+ return count;
+}
+
+static ssize_t
+timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_attr =
+__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
+
+static ssize_t
+timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL);
+
+static ssize_t
+stop_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long duration;
+ int err;
+
+ /*
+ * When we allow ourselves to sleep before a GPU reset after disabling
+ * submission, even for a few milliseconds, gives an innocent context
+ * the opportunity to clear the GPU before the reset occurs. However,
+ * how long to sleep depends on the typical non-preemptible duration
+ * (a similar problem to determining the ideal preempt-reset timeout
+ * or even the heartbeat interval).
+ */
+
+ err = kstrtoull(buf, 0, &duration);
+ if (err)
+ return err;
+
+ if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.stop_timeout_ms, duration);
+ return count;
+}
+
+static ssize_t
+stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_attr =
+__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
+
+static ssize_t
+stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_def =
+__ATTR(stop_timeout_ms, 0444, stop_default, NULL);
+
+static ssize_t
+preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long timeout;
+ int err;
+
+ /*
+ * After initialising a preemption request, we give the current
+ * resident a small amount of time to vacate the GPU. The preemption
+ * request is for a higher priority context and should be immediate to
+ * maintain high quality of service (and avoid priority inversion).
+ * However, the preemption granularity of the GPU can be quite coarse
+ * and so we need a compromise.
+ */
+
+ err = kstrtoull(buf, 0, &timeout);
+ if (err)
+ return err;
+
+ if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
+
+ if (READ_ONCE(engine->execlists.pending[0]))
+ set_timer_ms(&engine->execlists.preempt, timeout);
+
+ return count;
+}
+
+static ssize_t
+preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
+
+static ssize_t
+preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL);
+
+static ssize_t
+heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long delay;
+ int err;
+
+ /*
+ * We monitor the health of the system via periodic heartbeat pulses.
+ * The pulses also provide the opportunity to perform garbage
+ * collection. However, we interpret an incomplete pulse (a missed
+ * heartbeat) as an indication that the system is no longer responsive,
+ * i.e. hung, and perform an engine or full GPU reset. Given that the
+ * preemption granularity can be very coarse on a system, the optimal
+ * value for any workload is unknowable!
+ */
+
+ err = kstrtoull(buf, 0, &delay);
+ if (err)
+ return err;
+
+ if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ err = intel_engine_set_heartbeat(engine, delay);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static ssize_t
+heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_attr =
+__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
+
+static ssize_t
+heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_def =
+__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
+
+static void kobj_engine_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
+static struct kobj_type kobj_engine_type = {
+ .release = kobj_engine_release,
+ .sysfs_ops = &kobj_sysfs_ops
+};
+
+static struct kobject *
+kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
+{
+ struct kobj_engine *ke;
+
+ ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+ if (!ke)
+ return NULL;
+
+ kobject_init(&ke->base, &kobj_engine_type);
+ ke->engine = engine;
+
+ if (kobject_add(&ke->base, dir, "%s", engine->name)) {
+ kobject_put(&ke->base);
+ return NULL;
+ }
+
+ /* xfer ownership to sysfs tree */
+ return &ke->base;
+}
+
+static void add_defaults(struct kobj_engine *parent)
+{
+ static const struct attribute *files[] = {
+ &max_spin_def.attr,
+ &stop_timeout_def.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+ &heartbeat_interval_def.attr,
+#endif
+ NULL
+ };
+ struct kobj_engine *ke;
+
+ ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+ if (!ke)
+ return;
+
+ kobject_init(&ke->base, &kobj_engine_type);
+ ke->engine = parent->engine;
+
+ if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) {
+ kobject_put(&ke->base);
+ return;
+ }
+
+ if (sysfs_create_files(&ke->base, files))
+ return;
+
+ if (intel_engine_has_timeslices(ke->engine) &&
+ sysfs_create_file(&ke->base, &timeslice_duration_def.attr))
+ return;
+
+ if (intel_engine_has_preempt_reset(ke->engine) &&
+ sysfs_create_file(&ke->base, &preempt_timeout_def.attr))
+ return;
+}
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915)
+{
+ static const struct attribute *files[] = {
+ &name_attr.attr,
+ &class_attr.attr,
+ &inst_attr.attr,
+ &mmio_attr.attr,
+ &caps_attr.attr,
+ &all_caps_attr.attr,
+ &max_spin_attr.attr,
+ &stop_timeout_attr.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+ &heartbeat_interval_attr.attr,
+#endif
+ NULL
+ };
+
+ struct device *kdev = i915->drm.primary->kdev;
+ struct intel_engine_cs *engine;
+ struct kobject *dir;
+
+ dir = kobject_create_and_add("engine", &kdev->kobj);
+ if (!dir)
+ return;
+
+ for_each_uabi_engine(engine, i915) {
+ struct kobject *kobj;
+
+ kobj = kobj_engine(dir, engine);
+ if (!kobj)
+ goto err_engine;
+
+ if (sysfs_create_files(kobj, files))
+ goto err_object;
+
+ if (intel_engine_has_timeslices(engine) &&
+ sysfs_create_file(kobj, &timeslice_duration_attr.attr))
+ goto err_engine;
+
+ if (intel_engine_has_preempt_reset(engine) &&
+ sysfs_create_file(kobj, &preempt_timeout_attr.attr))
+ goto err_engine;
+
+ add_defaults(container_of(kobj, struct kobj_engine, base));
+
+ if (0) {
+err_object:
+ kobject_put(kobj);
+err_engine:
+ dev_err(kdev, "Failed to add sysfs engine '%s'\n",
+ engine->name);
+ break;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.h b/drivers/gpu/drm/i915/gt/sysfs_engines.h
new file mode 100644
index 000000000000..9546fffe03a7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_SYSFS_H
+#define INTEL_ENGINE_SYSFS_H
+
+struct drm_i915_private;
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915);
+
+#endif /* INTEL_ENGINE_SYSFS_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index c4c1523da7a6..861657897c0f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -169,7 +169,7 @@ void intel_guc_init_early(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
- intel_guc_fw_init_early(guc);
+ intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
intel_guc_ct_init_early(&guc->ct);
intel_guc_log_init_early(&guc->log);
intel_guc_submission_init_early(guc);
@@ -207,7 +207,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc)
{
u32 flags = 0;
- if (!intel_guc_is_submission_supported(guc))
+ if (!intel_guc_submission_is_used(guc))
flags |= GUC_CTL_DISABLE_SCHEDULER;
return flags;
@@ -217,7 +217,7 @@ static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc)
{
u32 flags = 0;
- if (intel_guc_is_submission_supported(guc)) {
+ if (intel_guc_submission_is_used(guc)) {
u32 ctxnum, base;
base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
@@ -333,7 +333,7 @@ int intel_guc_init(struct intel_guc *guc)
ret = intel_uc_fw_init(&guc->fw);
if (ret)
- goto err_fetch;
+ goto out;
ret = intel_guc_log_create(&guc->log);
if (ret)
@@ -348,7 +348,7 @@ int intel_guc_init(struct intel_guc *guc)
if (ret)
goto err_ads;
- if (intel_guc_is_submission_supported(guc)) {
+ if (intel_guc_submission_is_used(guc)) {
/*
* This is stuff we need to have available at fw load time
* if we are planning to enable submission later
@@ -364,6 +364,8 @@ int intel_guc_init(struct intel_guc *guc)
/* We need to notify the guc whenever we change the GGTT */
i915_ggtt_enable_guc(gt->ggtt);
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOADABLE);
+
return 0;
err_ct:
@@ -374,9 +376,8 @@ err_log:
intel_guc_log_destroy(&guc->log);
err_fw:
intel_uc_fw_fini(&guc->fw);
-err_fetch:
- intel_uc_fw_cleanup_fetch(&guc->fw);
- DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret);
+out:
+ i915_probe_error(gt->i915, "failed with %d\n", ret);
return ret;
}
@@ -384,12 +385,12 @@ void intel_guc_fini(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- if (!intel_uc_fw_is_available(&guc->fw))
+ if (!intel_uc_fw_is_loadable(&guc->fw))
return;
i915_ggtt_disable_guc(gt->ggtt);
- if (intel_guc_is_submission_supported(guc))
+ if (intel_guc_submission_is_used(guc))
intel_guc_submission_fini(guc);
intel_guc_ct_fini(&guc->ct);
@@ -397,9 +398,6 @@ void intel_guc_fini(struct intel_guc *guc)
intel_guc_ads_destroy(guc);
intel_guc_log_destroy(&guc->log);
intel_uc_fw_fini(&guc->fw);
- intel_uc_fw_cleanup_fetch(&guc->fw);
-
- intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
}
/*
@@ -544,7 +542,7 @@ int intel_guc_suspend(struct intel_guc *guc)
* If GuC communication is enabled but submission is not supported,
* we do not need to suspend the GuC.
*/
- if (!intel_guc_submission_is_enabled(guc))
+ if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
return 0;
/*
@@ -609,7 +607,7 @@ int intel_guc_resume(struct intel_guc *guc)
* we do not need to resume the GuC but we do need to enable the
* GuC communication on resume (above).
*/
- if (!intel_guc_submission_is_enabled(guc))
+ if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
return 0;
return intel_guc_send(guc, action, ARRAY_SIZE(action));
@@ -725,3 +723,47 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
return 0;
}
+
+/**
+ * intel_guc_load_status - dump information about GuC load status
+ * @guc: the GuC
+ * @p: the &drm_printer
+ *
+ * Pretty printer for GuC load status.
+ */
+void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ if (!intel_guc_is_supported(guc)) {
+ drm_printf(p, "GuC not supported\n");
+ return;
+ }
+
+ if (!intel_guc_is_wanted(guc)) {
+ drm_printf(p, "GuC disabled\n");
+ return;
+ }
+
+ intel_uc_fw_dump(&guc->fw, p);
+
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ u32 status = intel_uncore_read(uncore, GUC_STATUS);
+ u32 i;
+
+ drm_printf(p, "\nGuC status 0x%08x:\n", status);
+ drm_printf(p, "\tBootrom status = 0x%x\n",
+ (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+ drm_printf(p, "\tuKernel status = 0x%x\n",
+ (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+ drm_printf(p, "\tMIA Core status = 0x%x\n",
+ (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+ drm_puts(p, "\nScratch registers:\n");
+ for (i = 0; i < 16; i++) {
+ drm_printf(p, "\t%2d: \t0x%x\n",
+ i, intel_uncore_read(uncore, SOFT_SCRATCH(i)));
+ }
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 668b067b71e2..e84ab67b317d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -39,7 +39,7 @@ struct intel_guc {
void (*disable)(struct intel_guc *guc);
} interrupts;
- bool submission_supported;
+ bool submission_selected;
struct i915_vma *ads_vma;
struct __guc_ads_blob *ads_blob;
@@ -74,6 +74,11 @@ struct intel_guc {
struct mutex send_mutex;
};
+static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
+{
+ return container_of(log, struct intel_guc, log);
+}
+
static
inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
{
@@ -143,11 +148,17 @@ static inline bool intel_guc_is_supported(struct intel_guc *guc)
return intel_uc_fw_is_supported(&guc->fw);
}
-static inline bool intel_guc_is_enabled(struct intel_guc *guc)
+static inline bool intel_guc_is_wanted(struct intel_guc *guc)
{
return intel_uc_fw_is_enabled(&guc->fw);
}
+static inline bool intel_guc_is_used(struct intel_guc *guc)
+{
+ GEM_BUG_ON(__intel_uc_fw_status(&guc->fw) == INTEL_UC_FIRMWARE_SELECTED);
+ return intel_uc_fw_is_available(&guc->fw);
+}
+
static inline bool intel_guc_is_fw_running(struct intel_guc *guc)
{
return intel_uc_fw_is_running(&guc->fw);
@@ -167,11 +178,6 @@ static inline int intel_guc_sanitize(struct intel_guc *guc)
return 0;
}
-static inline bool intel_guc_is_submission_supported(struct intel_guc *guc)
-{
- return guc->submission_supported;
-}
-
static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask)
{
spin_lock_irq(&guc->irq_lock);
@@ -189,4 +195,6 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine);
+void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
new file mode 100644
index 000000000000..fe7cb7b29a1e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_guc.h"
+#include "intel_guc_debugfs.h"
+#include "intel_guc_log_debugfs.h"
+
+static int guc_info_show(struct seq_file *m, void *data)
+{
+ struct intel_guc *guc = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ if (!intel_guc_is_supported(guc))
+ return -ENODEV;
+
+ intel_guc_load_status(guc, &p);
+ drm_puts(&p, "\n");
+ intel_guc_log_info(&guc->log, &p);
+
+ /* Add more as required ... */
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
+
+void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "guc_info", &guc_info_fops, NULL },
+ };
+
+ if (!intel_guc_is_supported(guc))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), guc);
+ intel_guc_log_debugfs_register(&guc->log, root);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h
new file mode 100644
index 000000000000..424c26665cf1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GUC_H
+#define DEBUGFS_GUC_H
+
+struct intel_guc;
+struct dentry;
+
+void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root);
+
+#endif /* DEBUGFS_GUC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 3a1c47d600ea..d4a87f4c9421 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -13,20 +13,6 @@
#include "intel_guc_fw.h"
#include "i915_drv.h"
-/**
- * intel_guc_fw_init_early() - initializes GuC firmware struct
- * @guc: intel_guc struct
- *
- * On platforms with GuC selects firmware for uploading
- */
-void intel_guc_fw_init_early(struct intel_guc *guc)
-{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
-
- intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915),
- INTEL_INFO(i915)->platform, INTEL_REVID(i915));
-}
-
static void guc_prepare_xfer(struct intel_uncore *uncore)
{
u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
index b5ab639d7259..0b4d2a9c9435 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
@@ -8,7 +8,6 @@
struct intel_guc;
-void intel_guc_fw_init_early(struct intel_guc *guc);
int intel_guc_fw_upload(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index caed0d57e704..fb10f3597ea5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -55,11 +55,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
-static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
-{
- return container_of(log, struct intel_guc, log);
-}
-
static void guc_log_enable_flush_events(struct intel_guc_log *log)
{
intel_guc_enable_msg(log_to_guc(log),
@@ -672,3 +667,95 @@ void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
{
queue_work(system_highpri_wq, &log->relay.flush_work);
}
+
+static const char *
+stringify_guc_log_type(enum guc_log_buffer_type type)
+{
+ switch (type) {
+ case GUC_ISR_LOG_BUFFER:
+ return "ISR";
+ case GUC_DPC_LOG_BUFFER:
+ return "DPC";
+ case GUC_CRASH_DUMP_LOG_BUFFER:
+ return "CRASH";
+ default:
+ MISSING_CASE(type);
+ }
+
+ return "";
+}
+
+/**
+ * intel_guc_log_info - dump information about GuC log relay
+ * @log: the GuC log
+ * @p: the &drm_printer
+ *
+ * Pretty printer for GuC log info
+ */
+void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p)
+{
+ enum guc_log_buffer_type type;
+
+ if (!intel_guc_log_relay_created(log)) {
+ drm_puts(p, "GuC log relay not created\n");
+ return;
+ }
+
+ drm_puts(p, "GuC logging stats:\n");
+
+ drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count);
+
+ for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+ drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n",
+ stringify_guc_log_type(type),
+ log->stats[type].flush,
+ log->stats[type].sampled_overflow);
+ }
+}
+
+/**
+ * intel_guc_log_dump - dump the contents of the GuC log
+ * @log: the GuC log
+ * @p: the &drm_printer
+ * @dump_load_err: dump the log saved on GuC load error
+ *
+ * Pretty printer for the GuC log
+ */
+int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
+ bool dump_load_err)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+ struct drm_i915_gem_object *obj = NULL;
+ u32 *map;
+ int i = 0;
+
+ if (!intel_guc_is_supported(guc))
+ return -ENODEV;
+
+ if (dump_load_err)
+ obj = uc->load_err_log;
+ else if (guc->log.vma)
+ obj = guc->log.vma->obj;
+
+ if (!obj)
+ return 0;
+
+ map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(map)) {
+ DRM_DEBUG("Failed to pin object\n");
+ drm_puts(p, "(log data unaccessible)\n");
+ return PTR_ERR(map);
+ }
+
+ for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+ drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+ *(map + i), *(map + i + 1),
+ *(map + i + 2), *(map + i + 3));
+
+ drm_puts(p, "\n");
+
+ i915_gem_object_unpin_map(obj);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index c252c022c5fc..11fccd0b2294 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -79,4 +79,8 @@ static inline u32 intel_guc_log_get_level(struct intel_guc_log *log)
return log->level;
}
+void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p);
+int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
+ bool dump_load_err);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
new file mode 100644
index 000000000000..129e0cf7dfe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/fs.h>
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_guc.h"
+#include "intel_guc_log.h"
+#include "intel_guc_log_debugfs.h"
+
+static int guc_log_dump_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ return intel_guc_log_dump(m->private, &p, false);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
+
+static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ return intel_guc_log_dump(m->private, &p, true);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+
+static int guc_log_level_get(void *data, u64 *val)
+{
+ struct intel_guc_log *log = data;
+
+ if (!intel_guc_is_used(log_to_guc(log)))
+ return -ENODEV;
+
+ *val = intel_guc_log_get_level(log);
+
+ return 0;
+}
+
+static int guc_log_level_set(void *data, u64 val)
+{
+ struct intel_guc_log *log = data;
+
+ if (!intel_guc_is_used(log_to_guc(log)))
+ return -ENODEV;
+
+ return intel_guc_log_set_level(log, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(guc_log_level_fops,
+ guc_log_level_get, guc_log_level_set,
+ "%lld\n");
+
+static int guc_log_relay_open(struct inode *inode, struct file *file)
+{
+ struct intel_guc_log *log = inode->i_private;
+
+ if (!intel_guc_is_ready(log_to_guc(log)))
+ return -ENODEV;
+
+ file->private_data = log;
+
+ return intel_guc_log_relay_open(log);
+}
+
+static ssize_t
+guc_log_relay_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt,
+ loff_t *ppos)
+{
+ struct intel_guc_log *log = filp->private_data;
+ int val;
+ int ret;
+
+ ret = kstrtoint_from_user(ubuf, cnt, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Enable and start the guc log relay on value of 1.
+ * Flush log relay for any other value.
+ */
+ if (val == 1)
+ ret = intel_guc_log_relay_start(log);
+ else
+ intel_guc_log_relay_flush(log);
+
+ return ret ?: cnt;
+}
+
+static int guc_log_relay_release(struct inode *inode, struct file *file)
+{
+ struct intel_guc_log *log = inode->i_private;
+
+ intel_guc_log_relay_close(log);
+ return 0;
+}
+
+static const struct file_operations guc_log_relay_fops = {
+ .owner = THIS_MODULE,
+ .open = guc_log_relay_open,
+ .write = guc_log_relay_write,
+ .release = guc_log_relay_release,
+};
+
+void intel_guc_log_debugfs_register(struct intel_guc_log *log,
+ struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "guc_log_dump", &guc_log_dump_fops, NULL },
+ { "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL },
+ { "guc_log_level", &guc_log_level_fops, NULL },
+ { "guc_log_relay", &guc_log_relay_fops, NULL },
+ };
+
+ if (!intel_guc_is_supported(log_to_guc(log)))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), log);
+}
+
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h
new file mode 100644
index 000000000000..e8900e3d74ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GUC_LOG_H
+#define DEBUGFS_GUC_LOG_H
+
+struct intel_guc_log;
+struct dentry;
+
+void intel_guc_log_debugfs_register(struct intel_guc_log *log,
+ struct dentry *root);
+
+#endif /* DEBUGFS_GUC_LOG_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 9e42324fdecd..94eb63f309ce 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc,
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
+ u32 ctx_desc = rq->context->lrc.ccid;
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
guc_wq_item_append(guc, engine->guc_id, ctx_desc,
@@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine,
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->sched.attr.priority | __NO_PREEMPTION;
+ return rq->sched.attr.priority;
}
static struct i915_request *schedule_in(struct i915_request *rq, int idx)
@@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
+ i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
}
@@ -660,12 +658,9 @@ void intel_guc_submission_disable(struct intel_guc *guc)
guc_proc_desc_fini(guc);
}
-static bool __guc_submission_support(struct intel_guc *guc)
+static bool __guc_submission_selected(struct intel_guc *guc)
{
- /* XXX: GuC submission is unavailable for now */
- return false;
-
- if (!intel_guc_is_supported(guc))
+ if (!intel_guc_submission_is_supported(guc))
return false;
return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
@@ -673,7 +668,7 @@ static bool __guc_submission_support(struct intel_guc *guc)
void intel_guc_submission_init_early(struct intel_guc *guc)
{
- guc->submission_supported = __guc_submission_support(guc);
+ guc->submission_selected = __guc_submission_selected(guc);
}
bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index e402a2932592..4cf9d3e50263 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -8,7 +8,8 @@
#include <linux/types.h>
-struct intel_guc;
+#include "intel_guc.h"
+
struct intel_engine_cs;
void intel_guc_submission_init_early(struct intel_guc *guc);
@@ -20,4 +21,20 @@ int intel_guc_preempt_work_create(struct intel_guc *guc);
void intel_guc_preempt_work_destroy(struct intel_guc *guc);
bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
+static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
+{
+ /* XXX: GuC submission is unavailable for now */
+ return false;
+}
+
+static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc)
+{
+ return guc->submission_selected;
+}
+
+static inline bool intel_guc_submission_is_used(struct intel_guc *guc)
+{
+ return intel_guc_is_used(guc) && intel_guc_submission_is_wanted(guc);
+}
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 32a069841c14..65eeb44b397d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -41,7 +41,7 @@ void intel_huc_init_early(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
- intel_huc_fw_init_early(huc);
+ intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC);
if (INTEL_GEN(i915) >= 11) {
huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
@@ -121,19 +121,20 @@ int intel_huc_init(struct intel_huc *huc)
if (err)
goto out_fini;
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
+
return 0;
out_fini:
intel_uc_fw_fini(&huc->fw);
out:
- intel_uc_fw_cleanup_fetch(&huc->fw);
- DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err);
+ i915_probe_error(i915, "failed with %d\n", err);
return err;
}
void intel_huc_fini(struct intel_huc *huc)
{
- if (!intel_uc_fw_is_available(&huc->fw))
+ if (!intel_uc_fw_is_loadable(&huc->fw))
return;
intel_huc_rsa_data_destroy(huc);
@@ -199,9 +200,13 @@ fail:
* This function reads status register to verify if HuC
* firmware was successfully loaded.
*
- * Returns: 1 if HuC firmware is loaded and verified,
- * 0 if HuC firmware is not loaded and -ENODEV if HuC
- * is not present on this platform.
+ * Returns:
+ * * -ENODEV if HuC is not present on this platform,
+ * * -EOPNOTSUPP if HuC firmware is disabled,
+ * * -ENOPKG if HuC firmware was not installed,
+ * * -ENOEXEC if HuC firmware is invalid or mismatched,
+ * * 0 if HuC firmware is not running,
+ * * 1 if HuC firmware is authenticated and running.
*/
int intel_huc_check_status(struct intel_huc *huc)
{
@@ -209,11 +214,50 @@ int intel_huc_check_status(struct intel_huc *huc)
intel_wakeref_t wakeref;
u32 status = 0;
- if (!intel_huc_is_supported(huc))
+ switch (__intel_uc_fw_status(&huc->fw)) {
+ case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
return -ENODEV;
+ case INTEL_UC_FIRMWARE_DISABLED:
+ return -EOPNOTSUPP;
+ case INTEL_UC_FIRMWARE_MISSING:
+ return -ENOPKG;
+ case INTEL_UC_FIRMWARE_ERROR:
+ return -ENOEXEC;
+ default:
+ break;
+ }
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
status = intel_uncore_read(gt->uncore, huc->status.reg);
return (status & huc->status.mask) == huc->status.value;
}
+
+/**
+ * intel_huc_load_status - dump information about HuC load status
+ * @huc: the HuC
+ * @p: the &drm_printer
+ *
+ * Pretty printer for HuC load status.
+ */
+void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ intel_wakeref_t wakeref;
+
+ if (!intel_huc_is_supported(huc)) {
+ drm_printf(p, "HuC not supported\n");
+ return;
+ }
+
+ if (!intel_huc_is_wanted(huc)) {
+ drm_printf(p, "HuC disabled\n");
+ return;
+ }
+
+ intel_uc_fw_dump(&huc->fw, p);
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ drm_printf(p, "HuC status: 0x%08x\n",
+ intel_uncore_read(gt->uncore, huc->status.reg));
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 644c059fe01d..daee43b661d4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -41,14 +41,22 @@ static inline bool intel_huc_is_supported(struct intel_huc *huc)
return intel_uc_fw_is_supported(&huc->fw);
}
-static inline bool intel_huc_is_enabled(struct intel_huc *huc)
+static inline bool intel_huc_is_wanted(struct intel_huc *huc)
{
return intel_uc_fw_is_enabled(&huc->fw);
}
+static inline bool intel_huc_is_used(struct intel_huc *huc)
+{
+ GEM_BUG_ON(__intel_uc_fw_status(&huc->fw) == INTEL_UC_FIRMWARE_SELECTED);
+ return intel_uc_fw_is_available(&huc->fw);
+}
+
static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
{
return intel_uc_fw_is_running(&huc->fw);
}
+void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
new file mode 100644
index 000000000000..5733c15fd123
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_huc.h"
+#include "intel_huc_debugfs.h"
+
+static int huc_info_show(struct seq_file *m, void *data)
+{
+ struct intel_huc *huc = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ if (!intel_huc_is_supported(huc))
+ return -ENODEV;
+
+ intel_huc_load_status(huc, &p);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info);
+
+void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "huc_info", &huc_info_fops, NULL },
+ };
+
+ if (!intel_huc_is_supported(huc))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), huc);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h
new file mode 100644
index 000000000000..be79e992f976
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_HUC_H
+#define DEBUGFS_HUC_H
+
+struct intel_huc;
+struct dentry;
+
+void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root);
+
+#endif /* DEBUGFS_HUC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index eee193bf2cc4..e5ef509c70e8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,23 +8,6 @@
#include "i915_drv.h"
/**
- * intel_huc_fw_init_early() - initializes HuC firmware struct
- * @huc: intel_huc struct
- *
- * On platforms with HuC selects firmware for uploading
- */
-void intel_huc_fw_init_early(struct intel_huc *huc)
-{
- struct intel_gt *gt = huc_to_gt(huc);
- struct intel_uc *uc = &gt->uc;
- struct drm_i915_private *i915 = gt->i915;
-
- intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC,
- intel_uc_uses_guc(uc),
- INTEL_INFO(i915)->platform, INTEL_REVID(i915));
-}
-
-/**
* intel_huc_fw_upload() - load HuC uCode to device
* @huc: intel_huc structure
*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
index b791269ce923..12f264ee3e0b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -8,7 +8,6 @@
struct intel_huc;
-void intel_huc_fw_init_early(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index affc4d6f9ead..f518fe05c6f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -45,43 +45,43 @@ static void __confirm_options(struct intel_uc *uc)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
- DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
- "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
- i915_modparams.enable_guc,
- yesno(intel_uc_uses_guc(uc)),
- yesno(intel_uc_uses_guc_submission(uc)),
- yesno(intel_uc_uses_huc(uc)));
+ drm_dbg(&i915->drm,
+ "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
+ i915_modparams.enable_guc,
+ yesno(intel_uc_wants_guc(uc)),
+ yesno(intel_uc_wants_guc_submission(uc)),
+ yesno(intel_uc_wants_huc(uc)));
if (i915_modparams.enable_guc == -1)
return;
if (i915_modparams.enable_guc == 0) {
- GEM_BUG_ON(intel_uc_uses_guc(uc));
- GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
- GEM_BUG_ON(intel_uc_uses_huc(uc));
+ GEM_BUG_ON(intel_uc_wants_guc(uc));
+ GEM_BUG_ON(intel_uc_wants_guc_submission(uc));
+ GEM_BUG_ON(intel_uc_wants_huc(uc));
return;
}
if (!intel_uc_supports_guc(uc))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "GuC is not supported!");
if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC &&
!intel_uc_supports_huc(uc))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "HuC is not supported!");
if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION &&
!intel_uc_supports_guc_submission(uc))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "GuC submission is N/A");
if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION |
ENABLE_GUC_LOAD_HUC))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "undocumented flag");
}
@@ -93,7 +93,7 @@ void intel_uc_init_early(struct intel_uc *uc)
__confirm_options(uc);
- if (intel_uc_uses_guc(uc))
+ if (intel_uc_wants_guc(uc))
uc->ops = &uc_ops_on;
else
uc->ops = &uc_ops_off;
@@ -131,6 +131,13 @@ static void __uc_free_load_err_log(struct intel_uc *uc)
i915_gem_object_put(log);
}
+void intel_uc_driver_remove(struct intel_uc *uc)
+{
+ intel_uc_fini_hw(uc);
+ intel_uc_fini(uc);
+ __uc_free_load_err_log(uc);
+}
+
static inline bool guc_communication_enabled(struct intel_guc *guc)
{
return intel_guc_ct_enabled(&guc->ct);
@@ -257,13 +264,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
{
int err;
- GEM_BUG_ON(!intel_uc_uses_guc(uc));
+ GEM_BUG_ON(!intel_uc_wants_guc(uc));
err = intel_uc_fw_fetch(&uc->guc.fw);
if (err)
return;
- if (intel_uc_uses_huc(uc))
+ if (intel_uc_wants_huc(uc))
intel_uc_fw_fetch(&uc->huc.fw);
}
@@ -273,33 +280,44 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc)
intel_uc_fw_cleanup_fetch(&uc->guc.fw);
}
-static void __uc_init(struct intel_uc *uc)
+static int __uc_init(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret;
- GEM_BUG_ON(!intel_uc_uses_guc(uc));
+ GEM_BUG_ON(!intel_uc_wants_guc(uc));
+
+ if (!intel_uc_uses_guc(uc))
+ return 0;
+
+ if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
+ return -ENOMEM;
/* XXX: GuC submission is unavailable for now */
- GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
+ GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
ret = intel_guc_init(guc);
- if (ret) {
- intel_uc_fw_cleanup_fetch(&huc->fw);
- return;
+ if (ret)
+ return ret;
+
+ if (intel_uc_uses_huc(uc)) {
+ ret = intel_huc_init(huc);
+ if (ret)
+ goto out_guc;
}
- if (intel_uc_uses_huc(uc))
- intel_huc_init(huc);
+ return 0;
+
+out_guc:
+ intel_guc_fini(guc);
+ return ret;
}
static void __uc_fini(struct intel_uc *uc)
{
intel_huc_fini(&uc->huc);
intel_guc_fini(&uc->guc);
-
- __uc_free_load_err_log(uc);
}
static int __uc_sanitize(struct intel_uc *uc)
@@ -402,12 +420,12 @@ static int __uc_init_hw(struct intel_uc *uc)
int ret, attempts;
GEM_BUG_ON(!intel_uc_supports_guc(uc));
- GEM_BUG_ON(!intel_uc_uses_guc(uc));
+ GEM_BUG_ON(!intel_uc_wants_guc(uc));
- if (!intel_uc_fw_is_available(&guc->fw)) {
+ if (!intel_uc_fw_is_loadable(&guc->fw)) {
ret = __uc_check_hw(uc) ||
intel_uc_fw_is_overridden(&guc->fw) ||
- intel_uc_supports_guc_submission(uc) ?
+ intel_uc_wants_guc_submission(uc) ?
intel_uc_fw_status_to_error(guc->fw.status) : 0;
goto err_out;
}
@@ -459,17 +477,17 @@ static int __uc_init_hw(struct intel_uc *uc)
if (ret)
goto err_communication;
- if (intel_uc_supports_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_enable(guc);
- dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+ drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
guc->fw.major_ver_found, guc->fw.minor_ver_found,
"submission",
- enableddisabled(intel_uc_supports_guc_submission(uc)));
+ enableddisabled(intel_uc_uses_guc_submission(uc)));
if (intel_uc_uses_huc(uc)) {
- dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+ drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
huc->fw.path,
huc->fw.major_ver_found, huc->fw.minor_ver_found,
@@ -490,7 +508,7 @@ err_out:
__uc_sanitize(uc);
if (!ret) {
- dev_notice(i915->drm.dev, "GuC is uninitialized\n");
+ drm_notice(&i915->drm, "GuC is uninitialized\n");
/* We want to run without GuC submission */
return 0;
}
@@ -508,7 +526,7 @@ static void __uc_fini_hw(struct intel_uc *uc)
if (!intel_guc_is_fw_running(guc))
return;
- if (intel_uc_supports_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_disable(guc);
if (guc_communication_enabled(guc))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 49c913524686..9c954c589edf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -7,6 +7,7 @@
#define _INTEL_UC_H_
#include "intel_guc.h"
+#include "intel_guc_submission.h"
#include "intel_huc.h"
#include "i915_params.h"
@@ -16,7 +17,7 @@ struct intel_uc_ops {
int (*sanitize)(struct intel_uc *uc);
void (*init_fw)(struct intel_uc *uc);
void (*fini_fw)(struct intel_uc *uc);
- void (*init)(struct intel_uc *uc);
+ int (*init)(struct intel_uc *uc);
void (*fini)(struct intel_uc *uc);
int (*init_hw)(struct intel_uc *uc);
void (*fini_hw)(struct intel_uc *uc);
@@ -33,6 +34,7 @@ struct intel_uc {
void intel_uc_init_early(struct intel_uc *uc);
void intel_uc_driver_late_release(struct intel_uc *uc);
+void intel_uc_driver_remove(struct intel_uc *uc);
void intel_uc_init_mmio(struct intel_uc *uc);
void intel_uc_reset_prepare(struct intel_uc *uc);
void intel_uc_suspend(struct intel_uc *uc);
@@ -40,35 +42,44 @@ void intel_uc_runtime_suspend(struct intel_uc *uc);
int intel_uc_resume(struct intel_uc *uc);
int intel_uc_runtime_resume(struct intel_uc *uc);
-static inline bool intel_uc_supports_guc(struct intel_uc *uc)
-{
- return intel_guc_is_supported(&uc->guc);
-}
-
-static inline bool intel_uc_uses_guc(struct intel_uc *uc)
-{
- return intel_guc_is_enabled(&uc->guc);
-}
+/*
+ * We need to know as early as possible if we're going to use GuC or not to
+ * take the correct setup paths. Additionally, once we've started loading the
+ * GuC, it is unsafe to keep executing without it because some parts of the HW,
+ * a subset of which is not cleaned on GT reset, will start expecting the GuC FW
+ * to be running.
+ * To solve both these requirements, we commit to using the microcontrollers if
+ * the relevant modparam is set and the blobs are found on the system. At this
+ * stage, the only thing that can stop us from attempting to load the blobs on
+ * the HW and use them is a fundamental issue (e.g. no memory for our
+ * structures); if we hit such a problem during driver load we're broken even
+ * without GuC, so there is no point in trying to fall back.
+ *
+ * Given the above, we can be in one of 4 states, with the last one implying
+ * we're committed to using the microcontroller:
+ * - Not supported: not available in HW and/or firmware not defined.
+ * - Supported: available in HW and firmware defined.
+ * - Wanted: supported + enabled in modparam.
+ * - In use: wanted + firmware found on the system and successfully fetched.
+ */
-static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc)
-{
- return intel_guc_is_submission_supported(&uc->guc);
+#define __uc_state_checker(x, func, state, required) \
+static inline bool intel_uc_##state##_##func(struct intel_uc *uc) \
+{ \
+ return intel_##func##_is_##required(&uc->x); \
}
-static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc)
-{
- return intel_guc_is_submission_supported(&uc->guc);
-}
+#define uc_state_checkers(x, func) \
+__uc_state_checker(x, func, supports, supported) \
+__uc_state_checker(x, func, wants, wanted) \
+__uc_state_checker(x, func, uses, used)
-static inline bool intel_uc_supports_huc(struct intel_uc *uc)
-{
- return intel_uc_supports_guc(uc);
-}
+uc_state_checkers(guc, guc);
+uc_state_checkers(huc, huc);
+uc_state_checkers(guc, guc_submission);
-static inline bool intel_uc_uses_huc(struct intel_uc *uc)
-{
- return intel_huc_is_enabled(&uc->huc);
-}
+#undef uc_state_checkers
+#undef __uc_state_checker
#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
@@ -80,7 +91,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
intel_uc_ops_function(sanitize, sanitize, int, 0);
intel_uc_ops_function(fetch_firmwares, init_fw, void, );
intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
-intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(init, init, int, 0);
intel_uc_ops_function(fini, fini, void, );
intel_uc_ops_function(init_hw, init_hw, int, 0);
intel_uc_ops_function(fini_hw, fini_hw, void, );
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
new file mode 100644
index 000000000000..9d16b784aa0d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "intel_guc_debugfs.h"
+#include "intel_huc_debugfs.h"
+#include "intel_uc.h"
+#include "intel_uc_debugfs.h"
+
+void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
+{
+ struct dentry *root;
+
+ if (!gt_root)
+ return;
+
+ /* GuC and HuC go always in pair, no need to check both */
+ if (!intel_uc_supports_guc(uc))
+ return;
+
+ root = debugfs_create_dir("uc", gt_root);
+ if (IS_ERR(root))
+ return;
+
+ intel_guc_debugfs_register(&uc->guc, root);
+ intel_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h
new file mode 100644
index 000000000000..010ce250d223
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_UC_H
+#define DEBUGFS_UC_H
+
+struct intel_uc;
+struct dentry;
+
+void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root);
+
+#endif /* DEBUGFS_UC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 8ee0a0c7f447..e1caae93996d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,26 +11,32 @@
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
-static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+static inline struct intel_gt *
+____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
{
- GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
- if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+ if (type == INTEL_UC_FW_TYPE_GUC)
return container_of(uc_fw, struct intel_gt, uc.guc.fw);
- GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC);
+ GEM_BUG_ON(type != INTEL_UC_FW_TYPE_HUC);
return container_of(uc_fw, struct intel_gt, uc.huc.fw);
}
+static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+{
+ GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
+ return ____uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
#ifdef CONFIG_DRM_I915_DEBUG_GUC
void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_status status)
{
uc_fw->__status = status;
- DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev,
- "%s firmware -> %s\n",
- intel_uc_fw_type_repr(uc_fw->type),
- status == INTEL_UC_FIRMWARE_SELECTED ?
- uc_fw->path : intel_uc_fw_status_repr(status));
+ drm_dbg(&__uc_fw_to_gt(uc_fw)->i915->drm,
+ "%s firmware -> %s\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ status == INTEL_UC_FIRMWARE_SELECTED ?
+ uc_fw->path : intel_uc_fw_status_repr(status));
}
#endif
@@ -43,7 +49,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* features.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \
fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
@@ -187,17 +193,15 @@ static void __uc_fw_user_override(struct intel_uc_fw *uc_fw)
* intel_uc_fw_init_early - initialize the uC object and select the firmware
* @uc_fw: uC firmware
* @type: type of uC
- * @supported: is uC support possible
- * @platform: platform identifier
- * @rev: hardware revision
*
* Initialize the state of our uC object and relevant tracking and select the
* firmware to fetch and load.
*/
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type, bool supported,
- enum intel_platform platform, u8 rev)
+ enum intel_uc_fw_type type)
{
+ struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915;
+
/*
* we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
* before we're looked at the HW caps to see if we have uc support
@@ -208,8 +212,10 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
uc_fw->type = type;
- if (supported) {
- __uc_fw_auto_select(uc_fw, platform, rev);
+ if (HAS_GT_UC(i915)) {
+ __uc_fw_auto_select(uc_fw,
+ INTEL_INFO(i915)->platform,
+ INTEL_REVID(i915));
__uc_fw_user_override(uc_fw);
}
@@ -279,7 +285,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
err = i915_inject_probe_error(i915, -ENXIO);
if (err)
- return err;
+ goto fail;
__force_fw_fetch_failures(uc_fw, -EINVAL);
__force_fw_fetch_failures(uc_fw, -ESTALE);
@@ -290,7 +296,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* Check the size of the blob before examining buffer contents */
if (unlikely(fw->size < sizeof(struct uc_css_header))) {
- dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+ drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
fw->size, sizeof(struct uc_css_header));
err = -ENODATA;
@@ -303,7 +309,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
css->exponent_size_dw) * sizeof(u32);
if (unlikely(size != sizeof(struct uc_css_header))) {
- dev_warn(dev,
+ drm_warn(&i915->drm,
"%s firmware %s: unexpected header size: %zu != %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
fw->size, sizeof(struct uc_css_header));
@@ -316,7 +322,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* now RSA */
if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
- dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n",
+ drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
err = -EPROTO;
@@ -327,7 +333,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
if (unlikely(fw->size < size)) {
- dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+ drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
fw->size, size);
err = -ENOEXEC;
@@ -337,7 +343,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* Sanity check whether this fw is not larger than whole WOPCM memory */
size = __intel_uc_fw_get_upload_size(uc_fw);
if (unlikely(size >= i915->wopcm.size)) {
- dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n",
+ drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
size, (size_t)i915->wopcm.size);
err = -E2BIG;
@@ -352,7 +358,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
- dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
uc_fw->major_ver_found, uc_fw->minor_ver_found,
uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
@@ -380,9 +386,9 @@ fail:
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- dev_notice(dev, "%s firmware %s: fetch failed with error %d\n",
+ drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
- dev_info(dev, "%s firmware(s) can be downloaded from %s\n",
+ drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
release_firmware(fw); /* OK even if fw is NULL */
@@ -467,7 +473,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
/* Wait for DMA to finish */
ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100);
if (ret)
- dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n",
+ drm_err(&gt->i915->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
intel_uc_fw_type_repr(uc_fw->type),
intel_uncore_read_fw(uncore, DMA_CTRL));
@@ -501,7 +507,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
if (err)
return err;
- if (!intel_uc_fw_is_available(uc_fw))
+ if (!intel_uc_fw_is_loadable(uc_fw))
return -ENOEXEC;
/* Call custom loader */
@@ -544,7 +550,10 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
- intel_uc_fw_cleanup_fetch(uc_fw);
+ if (i915_gem_object_has_pinned_pages(uc_fw->obj))
+ i915_gem_object_unpin_pages(uc_fw->obj);
+
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 1f30543d0d2d..23d3a423ac0f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -29,8 +29,11 @@ struct intel_gt;
* | | SELECTED |
* +------------+- / | \ -+
* | | MISSING <--/ | \--> ERROR |
- * | fetch | | |
- * | | /------> AVAILABLE <---<-----------\ |
+ * | fetch | V |
+ * | | AVAILABLE |
+ * +------------+- | -+
+ * | init | V |
+ * | | /------> LOADABLE <----<-----------\ |
* +------------+- \ / \ \ \ -+
* | | FAIL <--< \--> TRANSFERRED \ |
* | upload | \ / \ / |
@@ -46,6 +49,7 @@ enum intel_uc_fw_status {
INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+ INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
@@ -115,6 +119,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
return "ERROR";
case INTEL_UC_FIRMWARE_AVAILABLE:
return "AVAILABLE";
+ case INTEL_UC_FIRMWARE_LOADABLE:
+ return "LOADABLE";
case INTEL_UC_FIRMWARE_FAIL:
return "FAIL";
case INTEL_UC_FIRMWARE_TRANSFERRED:
@@ -143,6 +149,7 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
case INTEL_UC_FIRMWARE_SELECTED:
return -ESTALE;
case INTEL_UC_FIRMWARE_AVAILABLE:
+ case INTEL_UC_FIRMWARE_LOADABLE:
case INTEL_UC_FIRMWARE_TRANSFERRED:
case INTEL_UC_FIRMWARE_RUNNING:
return 0;
@@ -184,6 +191,11 @@ static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw)
return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE;
}
+static inline bool intel_uc_fw_is_loadable(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_LOADABLE;
+}
+
static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
{
return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED;
@@ -202,7 +214,7 @@ static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw)
static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
{
if (intel_uc_fw_is_loaded(uc_fw))
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOADABLE);
}
static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
@@ -227,8 +239,7 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
}
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type, bool supported,
- enum intel_platform platform, u8 rev);
+ enum intel_uc_fw_type type);
int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);