diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
94 files changed, 8810 insertions, 1552 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c index 6a5e9ab20b94..5e3725e62241 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_engines.c +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c @@ -32,5 +32,5 @@ void debugfs_engines_register(struct intel_gt *gt, struct dentry *root) { "engines", &engines_fops }, }; - debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); } diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c index 75255aaacaed..1de5fbaa1cf9 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c @@ -9,6 +9,7 @@ #include "debugfs_engines.h" #include "debugfs_gt.h" #include "debugfs_gt_pm.h" +#include "uc/intel_uc_debugfs.h" #include "i915_drv.h" void debugfs_gt_register(struct intel_gt *gt) @@ -24,17 +25,19 @@ void debugfs_gt_register(struct intel_gt *gt) debugfs_engines_register(gt, root); debugfs_gt_pm_register(gt, root); + + intel_uc_debugfs_register(>->uc, root); } -void debugfs_gt_register_files(struct intel_gt *gt, - struct dentry *root, - const struct debugfs_gt_file *files, - unsigned long count) +void intel_gt_debugfs_register_files(struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count, void *data) { while (count--) { - if (!files->eval || files->eval(gt)) + umode_t mode = files->fops->write ? 0644 : 0444; + if (!files->eval || files->eval(data)) debugfs_create_file(files->name, - 0444, root, gt, + mode, root, data, files->fops); files++; diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h index 4ea0f06cda8f..f77540f727e9 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.h +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h @@ -28,12 +28,11 @@ void debugfs_gt_register(struct intel_gt *gt); struct debugfs_gt_file { const char *name; const struct file_operations *fops; - bool (*eval)(const struct intel_gt *gt); + bool (*eval)(void *data); }; -void debugfs_gt_register_files(struct intel_gt *gt, - struct dentry *root, - const struct debugfs_gt_file *files, - unsigned long count); +void intel_gt_debugfs_register_files(struct dentry *root, + const struct debugfs_gt_file *files, + unsigned long count, void *data); #endif /* DEBUGFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index 059c9e5c002e..174a24553322 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -10,6 +10,7 @@ #include "debugfs_gt_pm.h" #include "i915_drv.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_llc.h" #include "intel_rc6.h" #include "intel_rps.h" @@ -268,7 +269,7 @@ static int frequency_show(struct seq_file *m, void *unused) yesno(rpmodectl & GEN6_RP_ENABLE)); seq_printf(m, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); + GEN6_RP_MEDIA_SW_MODE)); vlv_punit_get(i915); freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); @@ -300,8 +301,9 @@ static int frequency_show(struct seq_file *m, void *unused) u32 rp_state_cap; u32 rpmodectl, rpinclimit, rpdeclimit; u32 rpstat, cagf, reqf; - u32 rpupei, rpcurup, rpprevup; - u32 rpdownei, rpcurdown, rpprevdown; + u32 rpcurupei, rpcurup, rpprevup; + u32 rpcurdownei, rpcurdown, rpprevdown; + u32 rpupei, rpupt, rpdownei, rpdownt; u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; int max_freq; @@ -334,12 +336,19 @@ static int frequency_show(struct seq_file *m, void *unused) rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); - rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + + rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); + rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + + rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + cagf = intel_rps_read_actual_frequency(rps); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); @@ -372,7 +381,7 @@ static int frequency_show(struct seq_file *m, void *unused) yesno(rpmodectl & GEN6_RP_ENABLE)); seq_printf(m, "SW control enabled: %s\n", yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); + GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_mask); @@ -394,23 +403,35 @@ static int frequency_show(struct seq_file *m, void *unused) seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%dus)\n", - rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei)); - seq_printf(m, "RP CUR UP: %d (%dus)\n", - rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%dus)\n", - rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup)); + seq_printf(m, "RP CUR UP EI: %d (%dns)\n", + rpcurupei, + intel_gt_pm_interval_to_ns(gt, rpcurupei)); + seq_printf(m, "RP CUR UP: %d (%dns)\n", + rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); + seq_printf(m, "RP PREV UP: %d (%dns)\n", + rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); seq_printf(m, "Up threshold: %d%%\n", rps->power.up_threshold); - - seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", - rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei)); - seq_printf(m, "RP CUR DOWN: %d (%dus)\n", - rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%dus)\n", - rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown)); + seq_printf(m, "RP UP EI: %d (%dns)\n", + rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); + seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n", + rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); + + seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", + rpcurdownei, + intel_gt_pm_interval_to_ns(gt, rpcurdownei)); + seq_printf(m, "RP CUR DOWN: %d (%dns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(gt, rpcurdown)); + seq_printf(m, "RP PREV DOWN: %d (%dns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(gt, rpprevdown)); seq_printf(m, "Down threshold: %d%%\n", rps->power.down_threshold); + seq_printf(m, "RP DOWN EI: %d (%dns)\n", + rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); + seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n", + rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; @@ -506,8 +527,10 @@ static int llc_show(struct seq_file *m, void *data) return 0; } -static bool llc_eval(const struct intel_gt *gt) +static bool llc_eval(void *data) { + struct intel_gt *gt = data; + return HAS_LLC(gt->i915); } @@ -533,7 +556,8 @@ static int rps_boost_show(struct seq_file *m, void *data) struct drm_i915_private *i915 = gt->i915; struct intel_rps *rps = >->rps; - seq_printf(m, "RPS enabled? %d\n", rps->enabled); + seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); + seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); @@ -553,7 +577,7 @@ static int rps_boost_show(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); - if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) { + if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) { struct intel_uncore *uncore = gt->uncore; u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -580,8 +604,10 @@ static int rps_boost_show(struct seq_file *m, void *data) return 0; } -static bool rps_eval(const struct intel_gt *gt) +static bool rps_eval(void *data) { + struct intel_gt *gt = data; + return HAS_RPS(gt->i915); } @@ -597,5 +623,5 @@ void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root) { "rps_boost", &rps_boost_fops, rps_eval }, }; - debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files)); + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); } diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c new file mode 100644 index 000000000000..de595b66a746 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gen7_renderclear.h" +#include "i915_drv.h" +#include "intel_gpu_commands.h" + +#define MAX_URB_ENTRIES 64 +#define STATE_SIZE (4 * 1024) +#define GT3_INLINE_DATA_DELAYS 0x1E00 +#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) + +struct cb_kernel { + const void *data; + u32 size; +}; + +#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) } + +#include "ivb_clear_kernel.c" +static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel); + +#include "hsw_clear_kernel.c" +static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel); + +struct batch_chunk { + struct i915_vma *vma; + u32 offset; + u32 *start; + u32 *end; + u32 max_items; +}; + +struct batch_vals { + u32 max_primitives; + u32 max_urb_entries; + u32 cmd_size; + u32 state_size; + u32 state_start; + u32 batch_size; + u32 surface_height; + u32 surface_width; + u32 scratch_size; + u32 max_size; +}; + +static void +batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) +{ + if (IS_HASWELL(i915)) { + bv->max_primitives = 280; + bv->max_urb_entries = MAX_URB_ENTRIES; + bv->surface_height = 16 * 16; + bv->surface_width = 32 * 2 * 16; + } else { + bv->max_primitives = 128; + bv->max_urb_entries = MAX_URB_ENTRIES / 2; + bv->surface_height = 16 * 8; + bv->surface_width = 32 * 16; + } + bv->cmd_size = bv->max_primitives * 4096; + bv->state_size = STATE_SIZE; + bv->state_start = bv->cmd_size; + bv->batch_size = bv->cmd_size + bv->state_size; + bv->scratch_size = bv->surface_height * bv->surface_width; + bv->max_size = bv->batch_size + bv->scratch_size; +} + +static void batch_init(struct batch_chunk *bc, + struct i915_vma *vma, + u32 *start, u32 offset, u32 max_bytes) +{ + bc->vma = vma; + bc->offset = offset; + bc->start = start + bc->offset / sizeof(*bc->start); + bc->end = bc->start; + bc->max_items = max_bytes / sizeof(*bc->start); +} + +static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) +{ + return (cs - bc->start) * sizeof(*bc->start) + bc->offset; +} + +static u32 batch_addr(const struct batch_chunk *bc) +{ + return bc->vma->node.start; +} + +static void batch_add(struct batch_chunk *bc, const u32 d) +{ + GEM_BUG_ON((bc->end - bc->start) >= bc->max_items); + *bc->end++ = d; +} + +static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items) +{ + u32 *map; + + if (align) { + u32 *end = PTR_ALIGN(bc->end, align); + + memset32(bc->end, 0, end - bc->end); + bc->end = end; + } + + map = bc->end; + bc->end += items; + + return map; +} + +static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes) +{ + GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start))); + return batch_alloc_items(bc, align, bytes / sizeof(*bc->start)); +} + +static u32 +gen7_fill_surface_state(struct batch_chunk *state, + const u32 dst_offset, + const struct batch_vals *bv) +{ + u32 surface_h = bv->surface_height; + u32 surface_w = bv->surface_width; + u32 *cs = batch_alloc_items(state, 32, 8); + u32 offset = batch_offset(state, cs); + +#define SURFACE_2D 1 +#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define RENDER_CACHE_READ_WRITE 1 + + *cs++ = SURFACE_2D << 29 | + (SURFACEFORMAT_B8G8R8A8_UNORM << 18) | + (RENDER_CACHE_READ_WRITE << 8); + + *cs++ = batch_addr(state) + dst_offset; + + *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1); + *cs++ = surface_w; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; +#define SHADER_CHANNELS(r, g, b, a) \ + (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16)) + *cs++ = SHADER_CHANNELS(4, 5, 6, 7); + batch_advance(state, cs); + + return offset; +} + +static u32 +gen7_fill_binding_table(struct batch_chunk *state, + const struct batch_vals *bv) +{ + u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 *cs = batch_alloc_items(state, 32, 8); + u32 offset = batch_offset(state, cs); + + *cs++ = surface_start - state->offset; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + batch_advance(state, cs); + + return offset; +} + +static u32 +gen7_fill_kernel_data(struct batch_chunk *state, + const u32 *data, + const u32 size) +{ + return batch_offset(state, + memcpy(batch_alloc_bytes(state, 64, size), + data, size)); +} + +static u32 +gen7_fill_interface_descriptor(struct batch_chunk *state, + const struct batch_vals *bv, + const struct cb_kernel *kernel, + unsigned int count) +{ + u32 kernel_offset = + gen7_fill_kernel_data(state, kernel->data, kernel->size); + u32 binding_table = gen7_fill_binding_table(state, bv); + u32 *cs = batch_alloc_items(state, 32, 8 * count); + u32 offset = batch_offset(state, cs); + + *cs++ = kernel_offset; + *cs++ = (1 << 7) | (1 << 13); + *cs++ = 0; + *cs++ = (binding_table - state->offset) | 1; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + /* 1 - 63dummy idds */ + memset32(cs, 0x00, (count - 1) * 8); + batch_advance(state, cs + (count - 1) * 8); + + return offset; +} + +static void +gen7_emit_state_base_address(struct batch_chunk *batch, + u32 surface_state_base) +{ + u32 *cs = batch_alloc_items(batch, 0, 12); + + *cs++ = STATE_BASE_ADDRESS | (12 - 2); + /* general */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* surface */ + *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; + /* dynamic */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* indirect */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* instruction */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + + /* general/dynamic/indirect/instruction access Bound */ + *cs++ = 0; + *cs++ = BASE_ADDRESS_MODIFY; + *cs++ = 0; + *cs++ = BASE_ADDRESS_MODIFY; + *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); +} + +static void +gen7_emit_vfe_state(struct batch_chunk *batch, + const struct batch_vals *bv, + u32 urb_size, u32 curbe_size, + u32 mode) +{ + u32 urb_entries = bv->max_urb_entries; + u32 threads = bv->max_primitives - 1; + u32 *cs = batch_alloc_items(batch, 32, 8); + + *cs++ = MEDIA_VFE_STATE | (8 - 2); + + /* scratch buffer */ + *cs++ = 0; + + /* number of threads & urb entries for GPGPU vs Media Mode */ + *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + + *cs++ = 0; + + /* urb entry size & curbe size in 256 bits unit */ + *cs++ = urb_size << 16 | curbe_size; + + /* scoreboard */ + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); +} + +static void +gen7_emit_interface_descriptor_load(struct batch_chunk *batch, + const u32 interface_descriptor, + unsigned int count) +{ + u32 *cs = batch_alloc_items(batch, 8, 4); + + *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2); + *cs++ = 0; + *cs++ = count * 8 * sizeof(*cs); + + /* + * interface descriptor address - it is relative to the dynamics base + * address + */ + *cs++ = interface_descriptor; + batch_advance(batch, cs); +} + +static void +gen7_emit_media_object(struct batch_chunk *batch, + unsigned int media_object_index) +{ + unsigned int x_offset = (media_object_index % 16) * 64; + unsigned int y_offset = (media_object_index / 16) * 16; + unsigned int inline_data_size; + unsigned int media_batch_size; + unsigned int i; + u32 *cs; + + inline_data_size = 112 * 8; + media_batch_size = inline_data_size + 6; + + cs = batch_alloc_items(batch, 8, media_batch_size); + + *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + + /* interface descriptor offset */ + *cs++ = 0; + + /* without indirect data */ + *cs++ = 0; + *cs++ = 0; + + /* scoreboard */ + *cs++ = 0; + *cs++ = 0; + + /* inline */ + *cs++ = (y_offset << 16) | (x_offset); + *cs++ = 0; + *cs++ = GT3_INLINE_DATA_DELAYS; + for (i = 3; i < inline_data_size; i++) + *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_flush(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 5); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); +} + +static void emit_batch(struct i915_vma * const vma, + u32 *start, + const struct batch_vals *bv) +{ + struct drm_i915_private *i915 = vma->vm->i915; + unsigned int desc_count = 64; + const u32 urb_size = 112; + struct batch_chunk cmds, state; + u32 interface_descriptor; + unsigned int i; + + batch_init(&cmds, vma, start, 0, bv->cmd_size); + batch_init(&state, vma, start, bv->state_start, bv->state_size); + + interface_descriptor = + gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + gen7_emit_pipeline_flush(&cmds); + batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + batch_add(&cmds, MI_NOOP); + gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_flush(&cmds); + + gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + + gen7_emit_interface_descriptor_load(&cmds, + interface_descriptor, + desc_count); + + for (i = 0; i < bv->max_primitives; i++) + gen7_emit_media_object(&cmds, i); + + batch_add(&cmds, MI_BATCH_BUFFER_END); +} + +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, + struct i915_vma * const vma) +{ + struct batch_vals bv; + u32 *batch; + + batch_get_defaults(engine->i915, &bv); + if (!vma) + return bv.max_size; + + GEM_BUG_ON(vma->obj->base.size < bv.max_size); + + batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.h b/drivers/gpu/drm/i915/gt/gen7_renderclear.h new file mode 100644 index 000000000000..bb100748e2c6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __GEN7_RENDERCLEAR_H__ +#define __GEN7_RENDERCLEAR_H__ + +struct intel_engine_cs; +struct i915_vma; + +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, + struct i915_vma * const vma); + +#endif /* __GEN7_RENDERCLEAR_H__ */ diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4d1de2d97d5c..699125928272 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -8,6 +8,7 @@ #include "gen8_ppgtt.h" #include "i915_scatterlist.h" #include "i915_trace.h" +#include "i915_pvinfo.h" #include "i915_vgpu.h" #include "intel_gt.h" #include "intel_gtt.h" @@ -25,6 +26,30 @@ static u64 gen8_pde_encode(const dma_addr_t addr, return pde; } +static u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; + + switch (level) { + case I915_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case I915_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -364,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, return err; } +static __always_inline void +write_pte(gen8_pte_t *pte, const gen8_pte_t val) +{ + /* Magic delays? Or can we refine these to flush all in one pass? */ + *pte = val; + wmb(); /* cpu to cache */ + clflush(pte); /* cache to memory */ + wmb(); /* visible to all */ +} + static __always_inline u64 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, @@ -380,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); - vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; + write_pte(&vaddr[gen8_pd_index(idx, 0)], + pte_encode | iter->dma); iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { @@ -462,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, do { GEM_BUG_ON(iter->sg->length < page_size); - vaddr[index++] = encode | iter->dma; + write_pte(&vaddr[index++], encode | iter->dma); start += page_size; iter->dma += page_size; @@ -706,6 +742,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; + ppgtt->vm.pte_encode = gen8_pte_encode; + if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); diff --git a/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c new file mode 100644 index 000000000000..b47f9d4a0848 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + * + * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC + */ + +static const u32 hsw_clear_kernel[] = { + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000040, 0x20280c21, 0x00000028, 0x00000001, + 0x01000010, 0x20000c20, 0x0000002c, 0x00000000, + 0x00010220, 0x34001c00, 0x00001400, 0x00000160, + 0x00600001, 0x20600061, 0x00000000, 0x00000000, + 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c, + 0x00000005, 0x20601ca5, 0x00000060, 0x00000001, + 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d, + 0x00000005, 0x20641ca5, 0x00000064, 0x00000003, + 0x00000041, 0x207424a5, 0x00000064, 0x00000034, + 0x00000040, 0x206014a5, 0x00000060, 0x00000074, + 0x00000008, 0x20681c85, 0x00000e00, 0x00000008, + 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f, + 0x00000041, 0x20701ca5, 0x00000060, 0x00000010, + 0x00000040, 0x206814a5, 0x00000068, 0x00000070, + 0x00600001, 0x20a00061, 0x00000000, 0x00000000, + 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007, + 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004, + 0x00600001, 0x20800021, 0x008d0000, 0x00000000, + 0x00000001, 0x20800021, 0x0000006c, 0x00000000, + 0x00000001, 0x20840021, 0x00000068, 0x00000000, + 0x00000001, 0x20880061, 0x00000000, 0x00000003, + 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001, + 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001, + 0x02000040, 0x20281c21, 0x00000028, 0xffffffff, + 0x00010220, 0x34001c00, 0x00001400, 0xffffffe0, + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000001, 0x220010e4, 0x00000000, 0x00000000, + 0x00000001, 0x220831ec, 0x00000000, 0x007f007f, + 0x00600001, 0x20400021, 0x008d0000, 0x00000000, + 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000, + 0x00200001, 0x20400121, 0x00450020, 0x00000000, + 0x00000001, 0x20480061, 0x00000000, 0x000f000f, + 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef, + 0x00800001, 0x20600061, 0x00000000, 0x00000000, + 0x00800001, 0x20800061, 0x00000000, 0x00000000, + 0x00800001, 0x20a00061, 0x00000000, 0x00000000, + 0x00800001, 0x20c00061, 0x00000000, 0x00000000, + 0x00800001, 0x20e00061, 0x00000000, 0x00000000, + 0x00800001, 0x21000061, 0x00000000, 0x00000000, + 0x00800001, 0x21200061, 0x00000000, 0x00000000, + 0x00800001, 0x21400061, 0x00000000, 0x00000000, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x00000040, 0x20402d21, 0x00000020, 0x00100010, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff, + 0x00800001, 0xa0000109, 0x00000602, 0x00000000, + 0x00000040, 0x22001c84, 0x00000200, 0x00000020, + 0x00010220, 0x34001c00, 0x00001400, 0xffffffc0, + 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010, +}; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index cbad7fe722ce..d907d538176e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -64,7 +64,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) if (!--b->irq_enabled) irq_disable(engine); - b->irq_armed = false; + WRITE_ONCE(b->irq_armed, false); intel_gt_pm_put_async(engine->gt); } @@ -73,7 +73,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; unsigned long flags; - if (!b->irq_armed) + if (!READ_ONCE(b->irq_armed)) return; spin_lock_irqsave(&b->irq_lock, flags); @@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(engine, tl); } +static void __signal_request(struct i915_request *rq, struct list_head *signals) +{ + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + if (!__dma_fence_signal(&rq->fence)) + return; + + i915_request_get(rq); + list_add_tail(&rq->signal_link, signals); +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); @@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work) if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); + list_splice_init(&b->signaled_requests, &signal); + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { GEM_BUG_ON(list_empty(&ce->signals)); @@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work) list_entry(pos, typeof(*rq), signal_link); GEM_BUG_ON(!check_signal_order(ce, rq)); - if (!__request_completed(rq)) break; - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, - &rq->fence.flags)); - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - - if (!__dma_fence_signal(&rq->fence)) - continue; - /* * Queue for execution after dropping the signaling * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - i915_request_get(rq); - list_add_tail(&rq->signal_link, &signal); + __signal_request(rq, &signal); } /* @@ -233,7 +238,7 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) * which we can add a new waiter and avoid the cost of re-enabling * the irq. */ - b->irq_armed = true; + WRITE_ONCE(b->irq_armed, true); /* * Since we are waiting on a request, the GPU should be busy @@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); + INIT_LIST_HEAD(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); } @@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) spin_unlock_irqrestore(&b->irq_lock, flags); } +void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + + spin_lock_irqsave(&b->irq_lock, flags); + if (!list_empty(&ce->signals)) { + struct i915_request *rq, *next; + + /* Queue for executing the signal callbacks in the irq_work */ + list_for_each_entry_safe(rq, next, &ce->signals, signal_link) { + GEM_BUG_ON(rq->engine != engine); + GEM_BUG_ON(!__request_completed(rq)); + + __signal_request(rq, &b->signaled_requests); + } + + INIT_LIST_HEAD(&ce->signals); + list_del_init(&ce->signal_link); + + irq_work_queue(&b->irq_work); + } + spin_unlock_irqrestore(&b->irq_lock, flags); +} + void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 8bb444cda14f..74ddb49b2941 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -51,6 +51,11 @@ int intel_context_alloc_state(struct intel_context *ce) return -EINTR; if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + if (intel_context_is_banned(ce)) { + err = -EIO; + goto unlock; + } + err = ce->ops->alloc(ce); if (unlikely(err)) goto unlock; @@ -92,6 +97,8 @@ int __intel_context_do_pin(struct intel_context *ce) { int err; + GEM_BUG_ON(intel_context_is_closed(ce)); + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) @@ -107,6 +114,11 @@ int __intel_context_do_pin(struct intel_context *ce) goto out_release; } + if (unlikely(intel_context_is_closed(ce))) { + err = -ENOENT; + goto out_unlock; + } + if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { err = intel_context_active_acquire(ce); if (unlikely(err)) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 18efad255124..07be021882cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -173,6 +173,11 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } +static inline bool intel_context_is_closed(const struct intel_context *ce) +{ + return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); +} + static inline bool intel_context_use_semaphores(const struct intel_context *ce) { return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c new file mode 100644 index 000000000000..65dcd090245d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context_param.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_active.h" +#include "intel_context.h" +#include "intel_context_param.h" +#include "intel_ring.h" + +int intel_context_set_ring_size(struct intel_context *ce, long sz) +{ + int err; + + if (intel_context_lock_pinned(ce)) + return -EINTR; + + err = i915_active_wait(&ce->active); + if (err < 0) + goto unlock; + + if (intel_context_is_pinned(ce)) { + err = -EBUSY; /* In active use, come back later! */ + goto unlock; + } + + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + struct intel_ring *ring; + + /* Replace the existing ringbuffer */ + ring = intel_engine_create_ring(ce->engine, sz); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto unlock; + } + + intel_ring_put(ce->ring); + ce->ring = ring; + + /* Context image will be updated on next pin */ + } else { + ce->ring = __intel_context_ring_size(sz); + } + +unlock: + intel_context_unlock_pinned(ce); + return err; +} + +long intel_context_get_ring_size(struct intel_context *ce) +{ + long sz = (unsigned long)READ_ONCE(ce->ring); + + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + if (intel_context_lock_pinned(ce)) + return -EINTR; + + sz = ce->ring->size; + intel_context_unlock_pinned(ce); + } + + return sz; +} diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h new file mode 100644 index 000000000000..f053d8633fe2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_CONTEXT_PARAM_H +#define INTEL_CONTEXT_PARAM_H + +struct intel_context; + +int intel_context_set_ring_size(struct intel_context *ce, long sz); +long intel_context_get_ring_size(struct intel_context *ce); + +#endif /* INTEL_CONTEXT_PARAM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 57a30956c922..487299cb91f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -25,8 +25,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, return PTR_ERR(cs); offset = i915_ggtt_offset(ce->state) + - LRC_STATE_PN * PAGE_SIZE + - CTX_R_PWR_CLK_STATE * 4; + LRC_STATE_OFFSET + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 11278343b9b5..4954b0df4864 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -45,8 +45,8 @@ struct intel_context { struct intel_engine_cs *engine; struct intel_engine_cs *inflight; -#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2) -#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) +#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2) +#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2) struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; @@ -62,13 +62,20 @@ struct intel_context { #define CONTEXT_BARRIER_BIT 0 #define CONTEXT_ALLOC_BIT 1 #define CONTEXT_VALID_BIT 2 -#define CONTEXT_USE_SEMAPHORES 3 -#define CONTEXT_BANNED 4 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 5 -#define CONTEXT_NOPREEMPT 6 +#define CONTEXT_CLOSED_BIT 3 +#define CONTEXT_USE_SEMAPHORES 4 +#define CONTEXT_BANNED 5 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 6 +#define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; - u64 lrc_desc; + union { + struct { + u32 lrca; + u32 ccid; + }; + u64 desc; + } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ /* Time on GPU as tracked by the hw. */ @@ -95,6 +102,8 @@ struct intel_context { /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; + + u8 wa_bb_page; /* if set, page num reserved for context workarounds */ }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 29c8c03c5caa..9bf6d4989968 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -107,7 +107,20 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - return *READ_ONCE(execlists->active); + struct i915_request * const *cur, * const *old, *active; + + cur = READ_ONCE(execlists->active); + smp_rmb(); /* pairs with overwrite protection in process_csb() */ + do { + old = cur; + + active = READ_ONCE(*cur); + cur = READ_ONCE(execlists->active); + + smp_rmb(); /* and complete the seqlock retry */ + } while (unlikely(cur != old)); + + return active; } static inline void @@ -186,6 +199,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); int intel_engines_init_mmio(struct intel_gt *gt); int intel_engines_init(struct intel_gt *gt); +void intel_engine_free_request_pool(struct intel_engine_cs *engine); + void intel_engines_release(struct intel_gt *gt); void intel_engines_free(struct intel_gt *gt); @@ -223,22 +238,35 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, + struct intel_context *ce); + void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, struct drm_printer *p); -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); - batch[0] = GFX_OP_PIPE_CONTROL(6); - batch[1] = flags; + batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; + batch[1] = flags1; batch[2] = offset; return batch + 6; } +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + return __gen8_emit_pipe_control(batch, 0, flags, offset); +} + +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + return __gen8_emit_pipe_control(batch, flags0, flags1, offset); +} + static inline u32 * -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) { /* We're using qword write, offset should be aligned to 8 bytes. */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); @@ -247,8 +275,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; + *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; *cs++ = gtt_offset; *cs++ = 0; *cs++ = value; @@ -258,6 +286,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } +static inline u32* +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags); +} + +static inline u32* +gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) +{ + return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1); +} + static inline u32 * gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { @@ -295,9 +335,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -int intel_enable_engine_stats(struct intel_engine_cs *engine); -void intel_disable_engine_stats(struct intel_engine_cs *engine); - ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); struct i915_request * @@ -320,13 +357,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return false; - - return intel_engine_has_semaphores(engine); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e46e55354e95..da5b61085257 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -31,7 +31,6 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "intel_engine_user.h" #include "intel_gt.h" #include "intel_gt_requests.h" @@ -275,6 +274,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; + struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); @@ -301,11 +301,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); - engine->i915 = gt->i915; + engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases); + engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); engine->class = info->class; engine->instance = info->instance; @@ -313,6 +313,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; + engine->props.max_busywait_duration_ns = + CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT; engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT; engine->props.stop_timeout_ms = @@ -320,11 +322,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->props.timeslice_duration_ms = CONFIG_DRM_I915_TIMESLICE_DURATION; + /* Override to uninterruptible for OpenCL workloads. */ + if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS) + engine->props.preempt_timeout_ms = 0; + + engine->defaults = engine->props; /* never to change again */ + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) - DRIVER_CAPS(gt->i915)->has_logical_contexts = true; + DRIVER_CAPS(i915)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; @@ -340,8 +348,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) gt->engine_class[info->class][info->instance] = engine; gt->engine[id] = engine; - gt->i915->engine[id] = engine; - return 0; } @@ -418,17 +424,27 @@ void intel_engines_release(struct intel_gt *gt) engine->release = NULL; memset(&engine->reset, 0, sizeof(engine->reset)); - - gt->i915->engine[id] = NULL; } } +void intel_engine_free_request_pool(struct intel_engine_cs *engine) +{ + if (!engine->request_pool) + return; + + kmem_cache_free(i915_request_slab_cache(), engine->request_pool); +} + void intel_engines_free(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + /* Free the requests! dma-resv keeps fences around for an eternity */ + rcu_barrier(); + for_each_engine(engine, gt, id) { + intel_engine_free_request_pool(engine); kfree(engine); gt->engine[id] = NULL; } @@ -616,8 +632,6 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init__pm(engine); intel_engine_init_retire(engine); - intel_engine_pool_init(&engine->pool); - /* Use the whole device by default */ engine->sseu = intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); @@ -639,7 +653,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; struct measure_breadcrumb *frame; - int dw = -ENOMEM; + int dw; GEM_BUG_ON(!engine->gt->scratch); @@ -814,12 +828,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); intel_engine_fini_retire(engine); - intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); if (engine->default_state) - i915_gem_object_put(engine->default_state); + fput(engine->default_state); if (engine->kernel_context) { intel_context_unpin(engine->kernel_context); @@ -1218,6 +1231,49 @@ static void print_request(struct drm_printer *m, name); } +static struct intel_timeline *get_timeline(struct i915_request *rq) +{ + struct intel_timeline *tl; + + /* + * Even though we are holding the engine->active.lock here, there + * is no control over the submission queue per-se and we are + * inspecting the active state at a random point in time, with an + * unknown queue. Play safe and make sure the timeline remains valid. + * (Only being used for pretty printing, one extra kref shouldn't + * cause a camel stampede!) + */ + rcu_read_lock(); + tl = rcu_dereference(rq->timeline); + if (!kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + + return tl; +} + +static int print_ring(char *buf, int sz, struct i915_request *rq) +{ + int len = 0; + + if (!i915_request_signaled(rq)) { + struct intel_timeline *tl = get_timeline(rq); + + len = scnprintf(buf, sz, + "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ", + i915_ggtt_offset(rq->ring->vma), + tl ? tl->hwsp_offset : 0, + hwsp_seqno(rq), + DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), + 1000 * 1000)); + + if (tl) + intel_timeline_put(tl); + } + + return len; +} + static void hexdump(struct drm_printer *m, const void *buf, size_t len) { const size_t rowsize = 8 * sizeof(u32); @@ -1247,27 +1303,6 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } -static struct intel_timeline *get_timeline(struct i915_request *rq) -{ - struct intel_timeline *tl; - - /* - * Even though we are holding the engine->active.lock here, there - * is no control over the submission queue per-se and we are - * inspecting the active state at a random point in time, with an - * unknown queue. Play safe and make sure the timeline remains valid. - * (Only being used for pretty printing, one extra kref shouldn't - * cause a camel stampede!) - */ - rcu_read_lock(); - tl = rcu_dereference(rq->timeline); - if (!kref_get_unless_zero(&tl->kref)) - tl = NULL; - rcu_read_unlock(); - - return tl; -} - static const char *repr_timer(const struct timer_list *t) { if (!READ_ONCE(t->expires)) @@ -1288,6 +1323,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); + if (HAS_EXECLISTS(dev_priv)) { + drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); + drm_printf(m, "\tEL_STAT_LO: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); + } drm_printf(m, "\tRING_START: 0x%08x\n", ENGINE_READ(engine, RING_START)); drm_printf(m, "\tRING_HEAD: 0x%08x\n", @@ -1379,40 +1420,25 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, char hdr[160]; int len; - len = snprintf(hdr, sizeof(hdr), - "\t\tActive[%d]: ", - (int)(port - execlists->active)); - if (!i915_request_signaled(rq)) { - struct intel_timeline *tl = get_timeline(rq); - - len += snprintf(hdr + len, sizeof(hdr) - len, - "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ", - i915_ggtt_offset(rq->ring->vma), - tl ? tl->hwsp_offset : 0, - hwsp_seqno(rq), - DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), - 1000 * 1000)); - - if (tl) - intel_timeline_put(tl); - } - snprintf(hdr + len, sizeof(hdr) - len, "rq: "); + len = scnprintf(hdr, sizeof(hdr), + "\t\tActive[%d]: ccid:%08x, ", + (int)(port - execlists->active), + rq->context->lrc.ccid); + len += print_ring(hdr + len, sizeof(hdr) - len, rq); + scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); } for (port = execlists->pending; (rq = *port); port++) { - struct intel_timeline *tl = get_timeline(rq); - char hdr[80]; - - snprintf(hdr, sizeof(hdr), - "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", - (int)(port - execlists->pending), - i915_ggtt_offset(rq->ring->vma), - tl ? tl->hwsp_offset : 0, - hwsp_seqno(rq)); - print_request(m, rq, hdr); + char hdr[160]; + int len; - if (tl) - intel_timeline_put(tl); + len = scnprintf(hdr, sizeof(hdr), + "\t\tPending[%d]: ccid:%08x, ", + (int)(port - execlists->pending), + rq->context->lrc.ccid); + len += print_ring(hdr + len, sizeof(hdr) - len, rq); + scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); + print_request(m, rq, hdr); } rcu_read_unlock(); execlists_active_unlock_bh(execlists); @@ -1561,58 +1587,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -/** - * intel_enable_engine_stats() - Enable engine busy tracking on engine - * @engine: engine to enable stats collection - * - * Start collecting the engine busyness data for @engine. - * - * Returns 0 on success or a negative error code. - */ -int intel_enable_engine_stats(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - unsigned long flags; - int err = 0; - - if (!intel_engine_supports_stats(engine)) - return -ENODEV; - - execlists_active_lock_bh(execlists); - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (unlikely(engine->stats.enabled == ~0)) { - err = -EBUSY; - goto unlock; - } - - if (engine->stats.enabled++ == 0) { - struct i915_request * const *port; - struct i915_request *rq; - - engine->stats.enabled_at = ktime_get(); - - /* XXX submission method oblivious? */ - for (port = execlists->active; (rq = *port); port++) - engine->stats.active++; - - for (port = execlists->pending; (rq = *port); port++) { - /* Exclude any contexts already counted in active */ - if (!intel_context_inflight_count(rq->context)) - engine->stats.active++; - } - - if (engine->stats.active) - engine->stats.start = engine->stats.enabled_at; - } - -unlock: - write_sequnlock_irqrestore(&engine->stats.lock, flags); - execlists_active_unlock_bh(execlists); - - return err; -} - static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) { ktime_t total = engine->stats.total; @@ -1621,7 +1595,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) * If the engine is executing something at the moment * add it to the total. */ - if (engine->stats.active) + if (atomic_read(&engine->stats.active)) total = ktime_add(total, ktime_sub(ktime_get(), engine->stats.start)); @@ -1647,28 +1621,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) return total; } -/** - * intel_disable_engine_stats() - Disable engine busy tracking on engine - * @engine: engine to disable stats collection - * - * Stops collecting the engine busyness data for @engine. - */ -void intel_disable_engine_stats(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (!intel_engine_supports_stats(engine)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - WARN_ON_ONCE(engine->stats.enabled == 0); - if (--engine->stats.enabled == 0) { - engine->stats.total = __intel_engine_get_busy_time(engine); - engine->stats.active = 0; - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - static bool match_ring(struct i915_request *rq) { u32 ring = ENGINE_READ(rq->engine, RING_START); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index dd825718e4e5..5136c8bf112d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -31,7 +31,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine) delay = msecs_to_jiffies_timeout(delay); if (delay >= HZ) delay = round_jiffies_up_relative(delay); - schedule_delayed_work(&engine->heartbeat.work, delay); + mod_delayed_work(system_wq, &engine->heartbeat.work, delay); return true; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b6cf284e3a2d..d0a1078ef632 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -10,31 +10,22 @@ #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" #include "intel_ring.h" +#include "shmem_utils.h" static int __engine_unpark(struct intel_wakeref *wf) { struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); struct intel_context *ce; - void *map; ENGINE_TRACE(engine, "\n"); intel_gt_pm_get(engine->gt); - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR_OR_NULL(map)) - engine->pinned_default_state = map; - /* Discard stale context state from across idling */ ce = engine->kernel_context; if (ce) { @@ -44,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { struct drm_i915_gem_object *obj = ce->state->obj; int type = i915_coherent_map_type(engine->i915); + void *map; map = i915_gem_object_pin_map(obj, type); if (!IS_ERR(map)) { @@ -181,7 +173,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * Ergo, if we put ourselves on the timelines.active_list * (se intel_timeline_enter()) before we increment the * engine->wakeref.count, we may see the request completion and retire - * it causing an undeflow of the engine->wakeref. + * it causing an underflow of the engine->wakeref. */ flags = __timeline_mark_lock(ce); GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); @@ -255,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); - intel_engine_pool_park(&engine->pool); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); @@ -263,11 +254,6 @@ static int __engine_park(struct intel_wakeref *wf) if (engine->park) engine->park(engine); - if (engine->pinned_default_state) { - i915_gem_object_unpin_map(engine->default_state); - engine->pinned_default_state = NULL; - } - engine->execlists.no_priolist = false; /* While gt calls i915_vma_parked(), we have to break the lock cycle */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index e52c2b0cb245..418df0a13145 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -37,6 +37,12 @@ static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) intel_wakeref_put_async(&engine->wakeref); } +static inline void intel_engine_pm_put_delay(struct intel_engine_cs *engine, + unsigned long delay) +{ + intel_wakeref_put_delay(&engine->wakeref, delay); +} + static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) { intel_wakeref_unlock_wait(&engine->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h deleted file mode 100644 index 1bd89cadc3b7..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef INTEL_ENGINE_POOL_H -#define INTEL_ENGINE_POOL_H - -#include "intel_engine_pool_types.h" -#include "i915_active.h" -#include "i915_request.h" - -struct intel_engine_pool_node * -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); - -static inline int -intel_engine_pool_mark_active(struct intel_engine_pool_node *node, - struct i915_request *rq) -{ - return i915_active_add_request(&node->active, rq); -} - -static inline void -intel_engine_pool_put(struct intel_engine_pool_node *node) -{ - i915_active_release(&node->active); -} - -void intel_engine_pool_init(struct intel_engine_pool *pool); -void intel_engine_pool_park(struct intel_engine_pool *pool); -void intel_engine_pool_fini(struct intel_engine_pool *pool); - -#endif /* INTEL_ENGINE_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index b23366a81048..2b6cdf47d428 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -22,7 +22,6 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "intel_engine_pool_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_wakeref.h" @@ -157,6 +156,20 @@ struct intel_engine_execlists { struct i915_priolist default_priolist; /** + * @ccid: identifier for contexts submitted to this engine + */ + u32 ccid; + + /** + * @yield: CCID at the time of the last semaphore-wait interrupt. + * + * Instead of leaving a semaphore busy-spinning on an engine, we would + * like to switch to another ready context, i.e. yielding the semaphore + * timeslice. + */ + u32 yield; + + /** * @error_interrupt: CS Master EIR * * The CS generates an interrupt when it detects an error. We capture @@ -167,6 +180,11 @@ struct intel_engine_execlists { u32 error_interrupt; /** + * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset + */ + u32 reset_ccid; + + /** * @no_priolist: priority lists disabled */ bool no_priolist; @@ -295,8 +313,7 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; - unsigned int context_tag; -#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) + unsigned long context_tag; struct rb_node uabi_node; @@ -308,6 +325,9 @@ struct intel_engine_cs { struct list_head hold; /* ready requests, but on hold */ } active; + /* keep a request in reserve for a [pm] barrier under oom */ + struct i915_request *request_pool; + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ @@ -323,8 +343,7 @@ struct intel_engine_cs { unsigned long wakeref_serial; struct intel_wakeref wakeref; - struct drm_i915_gem_object *default_state; - void *pinned_default_state; + struct file *default_state; struct { struct intel_ring *ring; @@ -358,6 +377,8 @@ struct intel_engine_cs { spinlock_t irq_lock; struct list_head signalers; + struct list_head signaled_requests; + struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; @@ -389,13 +410,6 @@ struct intel_engine_cs { struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct intel_engine_pool pool; - struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; struct i915_wa_list ctx_wa_list; @@ -407,6 +421,7 @@ struct intel_engine_cs { void (*irq_enable)(struct intel_engine_cs *engine); void (*irq_disable)(struct intel_engine_cs *engine); + void (*sanitize)(struct intel_engine_cs *engine); int (*resume)(struct intel_engine_cs *engine); struct { @@ -483,10 +498,11 @@ struct intel_engine_cs { #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) -#define I915_ENGINE_IS_VIRTUAL BIT(5) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_HAS_TIMESLICES BIT(4) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) +#define I915_ENGINE_IS_VIRTUAL BIT(6) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) unsigned int flags; /* @@ -515,42 +531,43 @@ struct intel_engine_cs { struct { /** - * @lock: Lock protecting the below fields. - */ - seqlock_t lock; - /** - * @enabled: Reference count indicating number of listeners. + * @active: Number of contexts currently scheduled in. */ - unsigned int enabled; + atomic_t active; + /** - * @active: Number of contexts currently scheduled in. + * @lock: Lock protecting the below fields. */ - unsigned int active; + seqlock_t lock; + /** - * @enabled_at: Timestamp when busy stats were enabled. + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). */ - ktime_t enabled_at; + ktime_t total; + /** * @start: Timestamp of the last idle to active transition. * * Idle is defined as active == 0, active is active > 0. */ ktime_t start; + /** - * @total: Total time this engine was busy. - * - * Accumulated time not counting the most recent block in cases - * where engine is currently busy (active > 0). + * @rps: Utilisation at last RPS sampling. */ - ktime_t total; + ktime_t rps; } stats; struct { unsigned long heartbeat_interval_ms; + unsigned long max_busywait_duration_ns; unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; - } props; + } props, defaults; }; static inline bool @@ -584,6 +601,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) } static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return engine->flags & I915_ENGINE_HAS_TIMESLICES; +} + +static inline bool intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 7dae91e0d002..66165b10256e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -8,6 +8,8 @@ #include <asm/set_memory.h> #include <asm/smp.h> +#include <drm/i915_drm.h> + #include "intel_gt.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -63,7 +65,7 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) ggtt->mappable_end); } - i915_ggtt_init_fences(ggtt); + intel_ggtt_init_fences(ggtt); return 0; } @@ -157,6 +159,13 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) intel_gtt_chipset_flush(); } +static u64 gen8_ggtt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + return addr | _PAGE_PRESENT; +} + static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) { writeq(pte, addr); @@ -172,7 +181,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); + gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0)); ggtt->invalidate(ggtt); } @@ -182,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { + const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; + struct sgt_iter iter; dma_addr_t addr; /* @@ -193,10 +203,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */ - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma->pages) + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0].encode); /* * We want to flush the TLBs only after we're certain all the PTE @@ -232,13 +249,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; - unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + gen6_pte_t __iomem *gte; + gen6_pte_t __iomem *end; struct sgt_iter iter; dma_addr_t addr; + gte = (gen6_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, iter, vma->pages) - iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + iowrite32(vm->pte_encode(addr, level, flags), gte++); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + iowrite32(vm->scratch[0].encode, gte++); /* * We want to flush the TLBs only after we're certain all the PTE @@ -427,7 +453,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) u64 size; int ret; - if (!USES_GUC(ggtt->vm.i915)) + if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); @@ -689,11 +715,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) */ void i915_ggtt_driver_release(struct drm_i915_private *i915) { + struct i915_ggtt *ggtt = &i915->ggtt; struct pagevec *pvec; - fini_aliasing_ppgtt(&i915->ggtt); + fini_aliasing_ppgtt(ggtt); - ggtt_cleanup_hw(&i915->ggtt); + intel_ggtt_fini_fences(ggtt); + ggtt_cleanup_hw(ggtt); pvec = &i915->mm.wc_stash.pvec; if (pvec->nr) { @@ -754,17 +782,17 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * readback check when writing GTT PTE entries. */ if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) - ggtt->gsm = ioremap_nocache(phys_addr, size); + ggtt->gsm = ioremap(phys_addr, size); else ggtt->gsm = ioremap_wc(phys_addr, size); if (!ggtt->gsm) { - DRM_ERROR("Failed to map the ggtt page table\n"); + drm_err(&i915->drm, "Failed to map the ggtt page table\n"); return -ENOMEM; } ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); if (ret) { - DRM_ERROR("Scratch setup failed\n"); + drm_err(&i915->drm, "Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ iounmap(ggtt->gsm); return ret; @@ -812,7 +840,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) struct pci_dev *pdev = i915->drm.pdev; unsigned int size; u16 snb_gmch_ctl; - int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ if (!IS_DGFX(i915)) { @@ -820,12 +847,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->mappable_end = resource_size(&ggtt->gmadr); } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - if (err) - DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); if (IS_CHERRYVIEW(i915)) size = chv_get_total_gtt_size(snb_gmch_ctl); @@ -857,7 +878,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.set_pages = ggtt_set_pages; ggtt->vm.vma_ops.clear_pages = clear_pages; - ggtt->vm.pte_encode = gen8_pte_encode; + ggtt->vm.pte_encode = gen8_ggtt_pte_encode; setup_private_pat(ggtt->vm.gt->uncore); @@ -961,7 +982,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) struct pci_dev *pdev = i915->drm.pdev; unsigned int size; u16 snb_gmch_ctl; - int err; ggtt->gmadr = pci_resource(pdev, 2); ggtt->mappable_end = resource_size(&ggtt->gmadr); @@ -971,15 +991,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) * just a coarse sanity check. */ if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); + drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", + &ggtt->mappable_end); return -ENXIO; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); - if (!err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); - if (err) - DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); size = gen6_get_total_gtt_size(snb_gmch_ctl); @@ -1026,7 +1042,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); if (!ret) { - DRM_ERROR("failed to set up gmch\n"); + drm_err(&i915->drm, "failed to set up gmch\n"); return -EIO; } @@ -1049,7 +1065,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.clear_pages = clear_pages; if (unlikely(ggtt->do_idle_maps)) - dev_notice(i915->drm.dev, + drm_notice(&i915->drm, "Applying Ironlake quirks for intel_iommu\n"); return 0; @@ -1074,26 +1090,29 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) return ret; if ((ggtt->vm.total - 1) >> 32) { - DRM_ERROR("We never expected a Global GTT with more than 32bits" - " of address space! Found %lldM!\n", - ggtt->vm.total >> 20); + drm_err(&i915->drm, + "We never expected a Global GTT with more than 32bits" + " of address space! Found %lldM!\n", + ggtt->vm.total >> 20); ggtt->vm.total = 1ULL << 32; ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->vm.total); } if (ggtt->mappable_end > ggtt->vm.total) { - DRM_ERROR("mappable aperture extends past end of GGTT," - " aperture=%pa, total=%llx\n", - &ggtt->mappable_end, ggtt->vm.total); + drm_err(&i915->drm, + "mappable aperture extends past end of GGTT," + " aperture=%pa, total=%llx\n", + &ggtt->mappable_end, ggtt->vm.total); ggtt->mappable_end = ggtt->vm.total; } /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); - DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); - DRM_DEBUG_DRIVER("DSM size = %lluM\n", - (u64)resource_size(&intel_graphics_stolen_res) >> 20); + drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); + drm_dbg(&i915->drm, "GMADR size = %lluM\n", + (u64)ggtt->mappable_end >> 20); + drm_dbg(&i915->drm, "DSM size = %lluM\n", + (u64)resource_size(&intel_graphics_stolen_res) >> 20); return 0; } @@ -1111,7 +1130,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) return ret; if (intel_vtd_active()) - dev_info(i915->drm.dev, "VT-d active for gfx access\n"); + drm_info(&i915->drm, "VT-d active for gfx access\n"); return 0; } @@ -1186,6 +1205,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) if (INTEL_GEN(ggtt->vm.i915) >= 8) setup_private_pat(ggtt->vm.gt->uncore); + + intel_ggtt_restore_fences(ggtt); } static struct scatterlist * diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c new file mode 100644 index 000000000000..7fb36b12fe7a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -0,0 +1,909 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "i915_drv.h" +#include "i915_scatterlist.h" +#include "i915_pvinfo.h" +#include "i915_vgpu.h" + +/** + * DOC: fence register handling + * + * Important to avoid confusions: "fences" in the i915 driver are not execution + * fences used to track command completion but hardware detiler objects which + * wrap a given range of the global GTT. Each platform has only a fairly limited + * set of these objects. + * + * Fences are used to detile GTT memory mappings. They're also connected to the + * hardware frontbuffer render tracking and hence interact with frontbuffer + * compression. Furthermore on older platforms fences are required for tiled + * objects used by the display engine. They can also be used by the render + * engine - they're required for blitter commands and are optional for render + * commands. But on gen4+ both display (with the exception of fbc) and rendering + * have their own tiling state bits and don't need fences. + * + * Also note that fences only support X and Y tiling and hence can't be used for + * the fancier new tiling formats like W, Ys and Yf. + * + * Finally note that because fences are such a restricted resource they're + * dynamically associated with objects. Furthermore fence state is committed to + * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must + * explicitly call i915_gem_object_get_fence() to synchronize fencing status + * for cpu access. Also note that some code wants an unfenced view, for those + * cases the fence can be removed forcefully with i915_gem_object_put_fence(). + * + * Internally these functions will synchronize with userspace access by removing + * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. + */ + +#define pipelined 0 + +static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.i915; +} + +static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.gt->uncore; +} + +static void i965_write_fence_reg(struct i915_fence_reg *fence) +{ + i915_reg_t fence_reg_lo, fence_reg_hi; + int fence_pitch_shift; + u64 val; + + if (INTEL_GEN(fence_to_i915(fence)) >= 6) { + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); + fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; + + } else { + fence_reg_lo = FENCE_REG_965_LO(fence->id); + fence_reg_hi = FENCE_REG_965_HI(fence->id); + fence_pitch_shift = I965_FENCE_PITCH_SHIFT; + } + + val = 0; + if (fence->tiling) { + unsigned int stride = fence->stride; + + GEM_BUG_ON(!IS_ALIGNED(stride, 128)); + + val = fence->start + fence->size - I965_FENCE_PAGE; + val <<= 32; + val |= fence->start; + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; + if (fence->tiling == I915_TILING_Y) + val |= BIT(I965_FENCE_TILING_Y_SHIFT); + val |= I965_FENCE_REG_VALID; + } + + if (!pipelined) { + struct intel_uncore *uncore = fence_to_uncore(fence); + + /* + * To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + intel_uncore_write_fw(uncore, fence_reg_lo, 0); + intel_uncore_posting_read_fw(uncore, fence_reg_lo); + + intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val)); + intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val)); + intel_uncore_posting_read_fw(uncore, fence_reg_lo); + } +} + +static void i915_write_fence_reg(struct i915_fence_reg *fence) +{ + u32 val; + + val = 0; + if (fence->tiling) { + unsigned int stride = fence->stride; + unsigned int tiling = fence->tiling; + bool is_y_tiled = tiling == I915_TILING_Y; + + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence))) + stride /= 128; + else + stride /= 512; + GEM_BUG_ON(!is_power_of_2(stride)); + + val = fence->start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I915_FENCE_SIZE_BITS(fence->size); + val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; + + val |= I830_FENCE_REG_VALID; + } + + if (!pipelined) { + struct intel_uncore *uncore = fence_to_uncore(fence); + i915_reg_t reg = FENCE_REG(fence->id); + + intel_uncore_write_fw(uncore, reg, val); + intel_uncore_posting_read_fw(uncore, reg); + } +} + +static void i830_write_fence_reg(struct i915_fence_reg *fence) +{ + u32 val; + + val = 0; + if (fence->tiling) { + unsigned int stride = fence->stride; + + val = fence->start; + if (fence->tiling == I915_TILING_Y) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I830_FENCE_SIZE_BITS(fence->size); + val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + } + + if (!pipelined) { + struct intel_uncore *uncore = fence_to_uncore(fence); + i915_reg_t reg = FENCE_REG(fence->id); + + intel_uncore_write_fw(uncore, reg, val); + intel_uncore_posting_read_fw(uncore, reg); + } +} + +static void fence_write(struct i915_fence_reg *fence) +{ + struct drm_i915_private *i915 = fence_to_i915(fence); + + /* + * Previous access through the fence register is marshalled by + * the mb() inside the fault handlers (i915_gem_release_mmaps) + * and explicitly managed for internal users. + */ + + if (IS_GEN(i915, 2)) + i830_write_fence_reg(fence); + else if (IS_GEN(i915, 3)) + i915_write_fence_reg(fence); + else + i965_write_fence_reg(fence); + + /* + * Access through the fenced region afterwards is + * ordered by the posting reads whilst writing the registers. + */ +} + +static bool gpu_uses_fence_registers(struct i915_fence_reg *fence) +{ + return INTEL_GEN(fence_to_i915(fence)) < 4; +} + +static int fence_update(struct i915_fence_reg *fence, + struct i915_vma *vma) +{ + struct i915_ggtt *ggtt = fence->ggtt; + struct intel_uncore *uncore = fence_to_uncore(fence); + intel_wakeref_t wakeref; + struct i915_vma *old; + int ret; + + fence->tiling = 0; + if (vma) { + GEM_BUG_ON(!i915_gem_object_get_stride(vma->obj) || + !i915_gem_object_get_tiling(vma->obj)); + + if (!i915_vma_is_map_and_fenceable(vma)) + return -EINVAL; + + if (gpu_uses_fence_registers(fence)) { + /* implicit 'unfenced' GPU blits */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + } + + fence->start = vma->node.start; + fence->size = vma->fence_size; + fence->stride = i915_gem_object_get_stride(vma->obj); + fence->tiling = i915_gem_object_get_tiling(vma->obj); + } + WRITE_ONCE(fence->dirty, false); + + old = xchg(&fence->vma, NULL); + if (old) { + /* XXX Ideally we would move the waiting to outside the mutex */ + ret = i915_active_wait(&fence->active); + if (ret) { + fence->vma = old; + return ret; + } + + i915_vma_flush_writes(old); + + /* + * Ensure that all userspace CPU access is completed before + * stealing the fence. + */ + if (old != vma) { + GEM_BUG_ON(old->fence != fence); + i915_vma_revoke_mmap(old); + old->fence = NULL; + } + + list_move(&fence->link, &ggtt->fence_list); + } + + /* + * We only need to update the register itself if the device is awake. + * If the device is currently powered down, we will defer the write + * to the runtime resume, see intel_ggtt_restore_fences(). + * + * This only works for removing the fence register, on acquisition + * the caller must hold the rpm wakeref. The fence register must + * be cleared before we can use any other fences to ensure that + * the new fences do not overlap the elided clears, confusing HW. + */ + wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm); + if (!wakeref) { + GEM_BUG_ON(vma); + return 0; + } + + WRITE_ONCE(fence->vma, vma); + fence_write(fence); + + if (vma) { + vma->fence = fence; + list_move_tail(&fence->link, &ggtt->fence_list); + } + + intel_runtime_pm_put(uncore->rpm, wakeref); + return 0; +} + +/** + * i915_vma_revoke_fence - force-remove fence for a VMA + * @vma: vma to map linearly (not through a fence reg) + * + * This function force-removes any fence from the given object, which is useful + * if the kernel wants to do untiled GTT access. + */ +void i915_vma_revoke_fence(struct i915_vma *vma) +{ + struct i915_fence_reg *fence = vma->fence; + intel_wakeref_t wakeref; + + lockdep_assert_held(&vma->vm->mutex); + if (!fence) + return; + + GEM_BUG_ON(fence->vma != vma); + GEM_BUG_ON(!i915_active_is_idle(&fence->active)); + GEM_BUG_ON(atomic_read(&fence->pin_count)); + + fence->tiling = 0; + WRITE_ONCE(fence->vma, NULL); + vma->fence = NULL; + + with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref) + fence_write(fence); +} + +static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) +{ + struct i915_fence_reg *fence; + + list_for_each_entry(fence, &ggtt->fence_list, link) { + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + + if (atomic_read(&fence->pin_count)) + continue; + + return fence; + } + + /* Wait for completion of pending flips which consume fences */ + if (intel_has_pending_fb_unpin(ggtt->vm.i915)) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-EDEADLK); +} + +int __i915_vma_pin_fence(struct i915_vma *vma) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); + struct i915_fence_reg *fence; + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + int err; + + lockdep_assert_held(&vma->vm->mutex); + + /* Just update our place in the LRU if our fence is getting reused. */ + if (vma->fence) { + fence = vma->fence; + GEM_BUG_ON(fence->vma != vma); + atomic_inc(&fence->pin_count); + if (!fence->dirty) { + list_move_tail(&fence->link, &ggtt->fence_list); + return 0; + } + } else if (set) { + fence = fence_find(ggtt); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + GEM_BUG_ON(atomic_read(&fence->pin_count)); + atomic_inc(&fence->pin_count); + } else { + return 0; + } + + err = fence_update(fence, set); + if (err) + goto out_unpin; + + GEM_BUG_ON(fence->vma != set); + GEM_BUG_ON(vma->fence != (set ? fence : NULL)); + + if (set) + return 0; + +out_unpin: + atomic_dec(&fence->pin_count); + return err; +} + +/** + * i915_vma_pin_fence - set up fencing for a vma + * @vma: vma to map through a fence reg + * + * When mapping objects through the GTT, userspace wants to be able to write + * to them without having to worry about swizzling if the object is tiled. + * This function walks the fence regs looking for a free one for @obj, + * stealing one if it can't find any. + * + * It then sets up the reg based on the object's properties: address, pitch + * and tiling format. + * + * For an untiled surface, this removes any existing fence. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int i915_vma_pin_fence(struct i915_vma *vma) +{ + int err; + + if (!vma->fence && !i915_gem_object_is_tiled(vma->obj)) + return 0; + + /* + * Note that we revoke fences on runtime suspend. Therefore the user + * must keep the device awake whilst using the fence. + */ + assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + + err = mutex_lock_interruptible(&vma->vm->mutex); + if (err) + return err; + + err = __i915_vma_pin_fence(vma); + mutex_unlock(&vma->vm->mutex); + + return err; +} + +/** + * i915_reserve_fence - Reserve a fence for vGPU + * @ggtt: Global GTT + * + * This function walks the fence regs looking for a free one and remove + * it from the fence_list. It is used to reserve fence for vGPU to use. + */ +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt) +{ + struct i915_fence_reg *fence; + int count; + int ret; + + lockdep_assert_held(&ggtt->vm.mutex); + + /* Keep at least one fence available for the display engine. */ + count = 0; + list_for_each_entry(fence, &ggtt->fence_list, link) + count += !atomic_read(&fence->pin_count); + if (count <= 1) + return ERR_PTR(-ENOSPC); + + fence = fence_find(ggtt); + if (IS_ERR(fence)) + return fence; + + if (fence->vma) { + /* Force-remove fence from VMA */ + ret = fence_update(fence, NULL); + if (ret) + return ERR_PTR(ret); + } + + list_del(&fence->link); + + return fence; +} + +/** + * i915_unreserve_fence - Reclaim a reserved fence + * @fence: the fence reg + * + * This function add a reserved fence register from vGPU to the fence_list. + */ +void i915_unreserve_fence(struct i915_fence_reg *fence) +{ + struct i915_ggtt *ggtt = fence->ggtt; + + lockdep_assert_held(&ggtt->vm.mutex); + + list_add(&fence->link, &ggtt->fence_list); +} + +/** + * intel_ggtt_restore_fences - restore fence state + * @ggtt: Global GTT + * + * Restore the hw fence state to match the software tracking again, to be called + * after a gpu reset and on resume. Note that on runtime suspend we only cancel + * the fences, to be reacquired by the user later. + */ +void intel_ggtt_restore_fences(struct i915_ggtt *ggtt) +{ + int i; + + for (i = 0; i < ggtt->num_fences; i++) + fence_write(&ggtt->fence_regs[i]); +} + +/** + * DOC: tiling swizzling details + * + * The idea behind tiling is to increase cache hit rates by rearranging + * pixel data so that a group of pixel accesses are in the same cacheline. + * Performance improvement from doing this on the back/depth buffer are on + * the order of 30%. + * + * Intel architectures make this somewhat more complicated, though, by + * adjustments made to addressing of data when the memory is in interleaved + * mode (matched pairs of DIMMS) to improve memory bandwidth. + * For interleaved memory, the CPU sends every sequential 64 bytes + * to an alternate memory channel so it can get the bandwidth from both. + * + * The GPU also rearranges its accesses for increased bandwidth to interleaved + * memory, and it matches what the CPU does for non-tiled. However, when tiled + * it does it a little differently, since one walks addresses not just in the + * X direction but also Y. So, along with alternating channels when bit + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) + * are common to both the 915 and 965-class hardware. + * + * The CPU also sometimes XORs in higher bits as well, to improve + * bandwidth doing strided access like we do so frequently in graphics. This + * is called "Channel XOR Randomization" in the MCH documentation. The result + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address + * decode. + * + * All of this bit 6 XORing has an effect on our memory management, + * as we need to make sure that the 3d driver can correctly address object + * contents. + * + * If we don't have interleaved memory, all tiling is safe and no swizzling is + * required. + * + * When bit 17 is XORed in, we simply refuse to tile at all. Bit + * 17 is not just a page offset, so as we page an object out and back in, + * individual pages in it will have different bit 17 addresses, resulting in + * each 64 bytes being swapped with its neighbor! + * + * Otherwise, if interleaved, we have to tell the 3d driver what the address + * swizzling it needs to do is, since it's writing with the CPU to the pages + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order + * to match what the GPU expects. + */ + +/** + * detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @ggtt: Global GGTT + * + * Detects bit 6 swizzling of address lookup between IGD access and CPU + * access through main memory. + */ +static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) +{ + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; + u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + + if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) { + /* + * On BDW+, swizzling is not used. We leave the CPU memory + * controller in charge of optimizing memory accesses without + * the extra address manipulation GPU side. + * + * VLV and CHV don't have GPU swizzling. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (INTEL_GEN(i915) >= 6) { + if (i915->preserve_bios_swizzle) { + if (intel_uncore_read(uncore, DISP_ARB_CTL) & + DISP_TILE_SURFACE_SWIZZLING) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + } else { + u32 dimm_c0, dimm_c1; + dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0); + dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1); + dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + /* + * Enable swizzling when the channels are populated + * with identically sized dimms. We don't need to check + * the 3rd channel because no cpu with gpu attached + * ships in that configuration. Also, swizzling only + * makes sense for 2 channels anyway. + */ + if (dimm_c0 == dimm_c1) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + } + } else if (IS_GEN(i915, 5)) { + /* + * On Ironlake whatever DRAM config, GPU always do + * same swizzling setup. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if (IS_GEN(i915, 2)) { + /* + * As far as we know, the 865 doesn't have these bit 6 + * swizzling issues. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) { + /* + * The 965, G33, and newer, have a very flexible memory + * configuration. It will enable dual-channel mode + * (interleaving) on as much memory as it can, and the GPU + * will additionally sometimes enable different bit 6 + * swizzling for tiled objects from the CPU. + * + * Here's what I found on the G965: + * slot fill memory size swizzling + * 0A 0B 1A 1B 1-ch 2-ch + * 512 0 0 0 512 0 O + * 512 0 512 0 16 1008 X + * 512 0 0 512 16 1008 X + * 0 512 0 512 16 1008 X + * 1024 1024 1024 0 2048 1024 O + * + * We could probably detect this based on either the DRB + * matching, which was the case for the swizzling required in + * the table above, or from the 1-ch value being less than + * the minimum size of a rank. + * + * Reports indicate that the swizzling actually + * varies depending upon page placement inside the + * channels, i.e. we see swizzled pages where the + * banks of memory are paired and unswizzled on the + * uneven portion, so leave that as unknown. + */ + if (intel_uncore_read(uncore, C0DRB3) == + intel_uncore_read(uncore, C1DRB3)) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } + } else { + u32 dcc = intel_uncore_read(uncore, DCC); + + /* + * On 9xx chipsets, channel interleave by the CPU is + * determined by DCC. For single-channel, neither the CPU + * nor the GPU do swizzling. For dual channel interleaved, + * the GPU's interleave is bit 9 and 10 for X tiled, and bit + * 9 for Y tiled. The CPU's interleave is independent, and + * can be based on either bit 11 (haven't seen this yet) or + * bit 17 (common). + */ + switch (dcc & DCC_ADDRESSING_MODE_MASK) { + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + break; + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: + if (dcc & DCC_CHANNEL_XOR_DISABLE) { + /* + * This is the base swizzling by the GPU for + * tiled buffers. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { + /* Bit 11 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; + swizzle_y = I915_BIT_6_SWIZZLE_9_11; + } else { + /* Bit 17 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; + } + break; + } + + /* check for L-shaped memory aka modified enhanced addressing */ + if (IS_GEN(i915, 4) && + !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + + if (dcc == 0xffffffff) { + drm_err(&i915->drm, "Couldn't read from MCHBAR. " + "Disabling tiling.\n"); + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + } + + if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || + swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { + /* + * Userspace likes to explode if it sees unknown swizzling, + * so lie. We will finish the lie when reporting through + * the get-tiling-ioctl by reporting the physical swizzle + * mode as unknown instead. + * + * As we don't strictly know what the swizzling is, it may be + * bit17 dependent, and so we need to also prevent the pages + * from being moved. + */ + i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + + i915->ggtt.bit_6_swizzle_x = swizzle_x; + i915->ggtt.bit_6_swizzle_y = swizzle_y; +} + +/* + * Swap every 64 bytes of this page around, to account for it having a new + * bit 17 of its physical address and therefore being interpreted differently + * by the GPU. + */ +static void swizzle_page(struct page *page) +{ + char temp[64]; + char *vaddr; + int i; + + vaddr = kmap(page); + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } + + kunmap(page); +} + +/** + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling + * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages + * + * This function fixes up the swizzling in case any page frame number for this + * object has changed in bit 17 since that state has been saved with + * i915_gem_object_save_bit_17_swizzle(). + * + * This is called when pinning backing storage again, since the kernel is free + * to move unpinned backing storage around (either by directly moving pages or + * by swapping them out and back in again). + */ +void +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + struct sgt_iter sgt_iter; + struct page *page; + int i; + + if (obj->bit_17 == NULL) + return; + + i = 0; + for_each_sgt_page(page, sgt_iter, pages) { + char new_bit_17 = page_to_phys(page) >> 17; + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { + swizzle_page(page); + set_page_dirty(page); + } + i++; + } +} + +/** + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling + * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages + * + * This function saves the bit 17 of each page frame number so that swizzling + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must + * be called before the backing storage can be unpinned. + */ +void +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + const unsigned int page_count = obj->base.size >> PAGE_SHIFT; + struct sgt_iter sgt_iter; + struct page *page; + int i; + + if (obj->bit_17 == NULL) { + obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL); + if (obj->bit_17 == NULL) { + DRM_ERROR("Failed to allocate memory for bit 17 " + "record\n"); + return; + } + } + + i = 0; + + for_each_sgt_page(page, sgt_iter, pages) { + if (page_to_phys(page) & (1 << 17)) + __set_bit(i, obj->bit_17); + else + __clear_bit(i, obj->bit_17); + i++; + } +} + +void intel_ggtt_init_fences(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + int num_fences; + int i; + + INIT_LIST_HEAD(&ggtt->fence_list); + INIT_LIST_HEAD(&ggtt->userfault_list); + intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm); + + detect_bit_6_swizzle(ggtt); + + if (!i915_ggtt_has_aperture(ggtt)) + num_fences = 0; + else if (INTEL_GEN(i915) >= 7 && + !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) + num_fences = 32; + else if (INTEL_GEN(i915) >= 4 || + IS_I945G(i915) || IS_I945GM(i915) || + IS_G33(i915) || IS_PINEVIEW(i915)) + num_fences = 16; + else + num_fences = 8; + + if (intel_vgpu_active(i915)) + num_fences = intel_uncore_read(uncore, + vgtif_reg(avail_rs.fence_num)); + ggtt->fence_regs = kcalloc(num_fences, + sizeof(*ggtt->fence_regs), + GFP_KERNEL); + if (!ggtt->fence_regs) + num_fences = 0; + + /* Initialize fence registers to zero */ + for (i = 0; i < num_fences; i++) { + struct i915_fence_reg *fence = &ggtt->fence_regs[i]; + + i915_active_init(&fence->active, NULL, NULL); + fence->ggtt = ggtt; + fence->id = i; + list_add_tail(&fence->link, &ggtt->fence_list); + } + ggtt->num_fences = num_fences; + + intel_ggtt_restore_fences(ggtt); +} + +void intel_ggtt_fini_fences(struct i915_ggtt *ggtt) +{ + int i; + + for (i = 0; i < ggtt->num_fences; i++) { + struct i915_fence_reg *fence = &ggtt->fence_regs[i]; + + i915_active_fini(&fence->active); + } + + kfree(ggtt->fence_regs); +} + +void intel_gt_init_swizzling(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + + if (INTEL_GEN(i915) < 5 || + i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + return; + + intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); + + if (IS_GEN(i915, 5)) + return; + + intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL); + + if (IS_GEN(i915, 6)) + intel_uncore_write(uncore, + ARB_MODE, + _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); + else if (IS_GEN(i915, 7)) + intel_uncore_write(uncore, + ARB_MODE, + _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); + else if (IS_GEN(i915, 8)) + intel_uncore_write(uncore, + GAMTARBMODE, + _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); + else + MISSING_CASE(INTEL_GEN(i915)); +} diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h new file mode 100644 index 000000000000..9eef679e1311 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __INTEL_GGTT_FENCING_H__ +#define __INTEL_GGTT_FENCING_H__ + +#include <linux/list.h> +#include <linux/types.h> + +#include "i915_active.h" + +struct drm_i915_gem_object; +struct i915_ggtt; +struct i915_vma; +struct intel_gt; +struct sg_table; + +#define I965_FENCE_PAGE 4096UL + +struct i915_fence_reg { + struct list_head link; + struct i915_ggtt *ggtt; + struct i915_vma *vma; + atomic_t pin_count; + struct i915_active active; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; + u32 start; + u32 size; + u32 tiling; + u32 stride; +}; + +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt); +void i915_unreserve_fence(struct i915_fence_reg *fence); + +void intel_ggtt_restore_fences(struct i915_ggtt *ggtt); + +void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages); +void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages); + +void intel_ggtt_init_fences(struct i915_ggtt *ggtt); +void intel_ggtt_fini_fences(struct i915_ggtt *ggtt); + +void intel_gt_init_swizzling(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 51b8718513bc..534e435f20bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -138,7 +138,7 @@ */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ -#define MI_LRI_CS_MMIO (1<<19) +#define MI_LRI_LRM_CS_MMIO REG_BIT(19) #define MI_LRI_FORCE_POSTED (1<<12) #define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) @@ -156,6 +156,7 @@ #define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) #define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) +#define MI_LRR_SOURCE_CS_MMIO REG_BIT(18) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -235,9 +236,8 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ -#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) @@ -292,10 +292,21 @@ #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) -#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) -#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) -#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) +#define STATE_BASE_ADDRESS \ + ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16)) +#define BASE_ADDRESS_MODIFY REG_BIT(0) +#define PIPELINE_SELECT \ + ((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16)) +#define PIPELINE_SELECT_MEDIA REG_BIT(0) +#define GFX_OP_3DSTATE_VF_STATISTICS \ + ((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16)) +#define MEDIA_VFE_STATE \ + ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16)) #define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) +#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \ + ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16)) +#define MEDIA_OBJECT \ + ((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16)) #define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) #define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f1f1b306e0af..f069551e412f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,8 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_gt_buffer_pool.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_mocs.h" @@ -15,6 +17,7 @@ #include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" +#include "shmem_utils.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -26,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); + intel_gt_init_buffer_pool(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); @@ -370,18 +374,6 @@ static struct i915_address_space *kernel_vm(struct intel_gt *gt) return i915_vm_get(>->ggtt->vm); } -static int __intel_context_flush_retire(struct intel_context *ce) -{ - struct intel_timeline *tl; - - tl = intel_context_timeline_lock(ce); - if (IS_ERR(tl)) - return PTR_ERR(tl); - - intel_context_timeline_unlock(tl); - return 0; -} - static int __engines_record_defaults(struct intel_gt *gt) { struct i915_request *requests[I915_NUM_ENGINES] = {}; @@ -447,8 +439,7 @@ err_rq: for (id = 0; id < ARRAY_SIZE(requests); id++) { struct i915_request *rq; - struct i915_vma *state; - void *vaddr; + struct file *state; rq = requests[id]; if (!rq) @@ -460,48 +451,16 @@ err_rq: } GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); - state = rq->context->state; - if (!state) + if (!rq->context->state) continue; - /* Serialise with retirement on another CPU */ - GEM_BUG_ON(!i915_request_completed(rq)); - err = __intel_context_flush_retire(rq->context); - if (err) - goto out; - - /* We want to be able to unbind the state from the GGTT */ - GEM_BUG_ON(intel_context_is_pinned(rq->context)); - - /* - * As we will hold a reference to the logical state, it will - * not be torn down with the context, and importantly the - * object will hold onto its vma (making it possible for a - * stray GTT write to corrupt our defaults). Unmap the vma - * from the GTT to prevent such accidents and reclaim the - * space. - */ - err = i915_vma_unbind(state); - if (err) - goto out; - - i915_gem_object_lock(state->obj); - err = i915_gem_object_set_to_cpu_domain(state->obj, false); - i915_gem_object_unlock(state->obj); - if (err) - goto out; - - i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); - - /* Check we can acquire the image of the context state */ - vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); + /* Keep a copy of the state's backing pages; free the obj */ + state = shmem_create_from_object(rq->context->state->obj); + if (IS_ERR(state)) { + err = PTR_ERR(state); goto out; } - - rq->engine->default_state = i915_gem_object_get(state->obj); - i915_gem_object_unpin_map(state->obj); + rq->engine->default_state = state; } out: @@ -576,6 +535,8 @@ int intel_gt_init(struct intel_gt *gt) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_gt_init_clock_frequency(gt); + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); if (err) goto out_fw; @@ -592,7 +553,9 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_engines; - intel_uc_init(>->uc); + err = intel_uc_init(>->uc); + if (err) + goto err_engines; err = intel_gt_resume(gt); if (err) @@ -633,8 +596,7 @@ void intel_gt_driver_remove(struct intel_gt *gt) { __intel_gt_disable(gt); - intel_uc_fini_hw(>->uc); - intel_uc_fini(>->uc); + intel_uc_driver_remove(>->uc); intel_engines_release(gt); } @@ -642,6 +604,13 @@ void intel_gt_driver_remove(struct intel_gt *gt) void intel_gt_driver_unregister(struct intel_gt *gt) { intel_rps_driver_unregister(>->rps); + + /* + * Upon unregistering the device to prevent any new users, cancel + * all in-flight requests so that we can quickly unbind the active + * resources. + */ + intel_gt_set_wedged(gt); } void intel_gt_driver_release(struct intel_gt *gt) @@ -654,10 +623,14 @@ void intel_gt_driver_release(struct intel_gt *gt) intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); + intel_gt_fini_buffer_pool(gt); } void intel_gt_driver_late_release(struct intel_gt *gt) { + /* We need to wait for inflight RCU frees to release their grip */ + rcu_barrier(); + intel_uc_driver_late_release(>->uc); intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 397186818305..1495054a4305 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ @@ -8,15 +7,15 @@ #include "i915_drv.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" +#include "intel_gt_buffer_pool.h" -static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool) +static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool) { - return container_of(pool, struct intel_engine_cs, pool); + return container_of(pool, struct intel_gt, buffer_pool); } static struct list_head * -bucket_for_size(struct intel_engine_pool *pool, size_t sz) +bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz) { int n; @@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz) return &pool->cache_list[n]; } -static void node_free(struct intel_engine_pool_node *node) +static void node_free(struct intel_gt_buffer_pool_node *node) { i915_gem_object_put(node->obj); i915_active_fini(&node->active); kfree(node); } +static void pool_free_work(struct work_struct *wrk) +{ + struct intel_gt_buffer_pool *pool = + container_of(wrk, typeof(*pool), work.work); + struct intel_gt_buffer_pool_node *node, *next; + unsigned long old = jiffies - HZ; + bool active = false; + LIST_HEAD(stale); + int n; + + /* Free buffers that have not been used in the past second */ + spin_lock_irq(&pool->lock); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct list_head *list = &pool->cache_list[n]; + + /* Most recent at head; oldest at tail */ + list_for_each_entry_safe_reverse(node, next, list, link) { + if (time_before(node->age, old)) + break; + + list_move(&node->link, &stale); + } + active |= !list_empty(list); + } + spin_unlock_irq(&pool->lock); + + list_for_each_entry_safe(node, next, &stale, link) + node_free(node); + + if (active) + schedule_delayed_work(&pool->work, + round_jiffies_up_relative(HZ)); +} + static int pool_active(struct i915_active *ref) { - struct intel_engine_pool_node *node = + struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); struct dma_resv *resv = node->obj->base.resv; int err; @@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref) __i915_active_call static void pool_retire(struct i915_active *ref) { - struct intel_engine_pool_node *node = + struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); - struct intel_engine_pool *pool = node->pool; + struct intel_gt_buffer_pool *pool = node->pool; struct list_head *list = bucket_for_size(pool, node->obj->base.size); unsigned long flags; - GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); - i915_gem_object_unpin_pages(node->obj); /* Return this object to the shrinker pool */ i915_gem_object_make_purgeable(node->obj); spin_lock_irqsave(&pool->lock, flags); + node->age = jiffies; list_add(&node->link, list); spin_unlock_irqrestore(&pool->lock, flags); + + schedule_delayed_work(&pool->work, + round_jiffies_up_relative(HZ)); } -static struct intel_engine_pool_node * -node_create(struct intel_engine_pool *pool, size_t sz) +static struct intel_gt_buffer_pool_node * +node_create(struct intel_gt_buffer_pool *pool, size_t sz) { - struct intel_engine_cs *engine = to_engine(pool); - struct intel_engine_pool_node *node; + struct intel_gt *gt = to_gt(pool); + struct intel_gt_buffer_pool_node *node; struct drm_i915_gem_object *obj; node = kmalloc(sizeof(*node), @@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) node->pool = pool; i915_active_init(&node->active, pool_active, pool_retire); - obj = i915_gem_object_create_internal(engine->i915, sz); + obj = i915_gem_object_create_internal(gt->i915, sz); if (IS_ERR(obj)) { i915_active_fini(&node->active); kfree(node); @@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz) return node; } -static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +struct intel_gt_buffer_pool_node * +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) { - if (intel_engine_is_virtual(engine)) - engine = intel_virtual_engine_get_sibling(engine, 0); - - GEM_BUG_ON(!engine); - return &engine->pool; -} - -struct intel_engine_pool_node * -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) -{ - struct intel_engine_pool *pool = lookup_pool(engine); - struct intel_engine_pool_node *node; + struct intel_gt_buffer_pool *pool = >->buffer_pool; + struct intel_gt_buffer_pool_node *node; struct list_head *list; unsigned long flags; int ret; - GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); - size = PAGE_ALIGN(size); list = bucket_for_size(pool, size); @@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) return node; } -void intel_engine_pool_init(struct intel_engine_pool *pool) +void intel_gt_init_buffer_pool(struct intel_gt *gt) { + struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; spin_lock_init(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) INIT_LIST_HEAD(&pool->cache_list[n]); + INIT_DELAYED_WORK(&pool->work, pool_free_work); } -void intel_engine_pool_park(struct intel_engine_pool *pool) +static void pool_free_imm(struct intel_gt_buffer_pool *pool) { int n; + spin_lock_irq(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct intel_gt_buffer_pool_node *node, *next; struct list_head *list = &pool->cache_list[n]; - struct intel_engine_pool_node *node, *nn; - list_for_each_entry_safe(node, nn, list, link) + list_for_each_entry_safe(node, next, list, link) node_free(node); - INIT_LIST_HEAD(list); } + spin_unlock_irq(&pool->lock); +} + +void intel_gt_flush_buffer_pool(struct intel_gt *gt) +{ + struct intel_gt_buffer_pool *pool = >->buffer_pool; + + if (cancel_delayed_work_sync(&pool->work)) + pool_free_imm(pool); } -void intel_engine_pool_fini(struct intel_engine_pool *pool) +void intel_gt_fini_buffer_pool(struct intel_gt *gt) { + struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; + intel_gt_flush_buffer_pool(gt); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) GEM_BUG_ON(!list_empty(&pool->cache_list[n])); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h new file mode 100644 index 000000000000..42cbac003e8a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_GT_BUFFER_POOL_H +#define INTEL_GT_BUFFER_POOL_H + +#include <linux/types.h> + +#include "i915_active.h" +#include "intel_gt_buffer_pool_types.h" + +struct intel_gt; +struct i915_request; + +struct intel_gt_buffer_pool_node * +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size); + +static inline int +intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node, + struct i915_request *rq) +{ + return i915_active_add_request(&node->active, rq); +} + +static inline void +intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node) +{ + i915_active_release(&node->active); +} + +void intel_gt_init_buffer_pool(struct intel_gt *gt); +void intel_gt_flush_buffer_pool(struct intel_gt *gt); +void intel_gt_fini_buffer_pool(struct intel_gt *gt); + +#endif /* INTEL_GT_BUFFER_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index e31ee361b76f..e28bdda771ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -4,26 +4,29 @@ * Copyright © 2014-2018 Intel Corporation */ -#ifndef INTEL_ENGINE_POOL_TYPES_H -#define INTEL_ENGINE_POOL_TYPES_H +#ifndef INTEL_GT_BUFFER_POOL_TYPES_H +#define INTEL_GT_BUFFER_POOL_TYPES_H #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> #include "i915_active_types.h" struct drm_i915_gem_object; -struct intel_engine_pool { +struct intel_gt_buffer_pool { spinlock_t lock; struct list_head cache_list[4]; + struct delayed_work work; }; -struct intel_engine_pool_node { +struct intel_gt_buffer_pool_node { struct i915_active active; struct drm_i915_gem_object *obj; struct list_head link; - struct intel_engine_pool *pool; + struct intel_gt_buffer_pool *pool; + unsigned long age; }; -#endif /* INTEL_ENGINE_POOL_TYPES_H */ +#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c new file mode 100644 index 000000000000..999079686846 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_clock_utils.h" + +#define MHZ_12 12000000 /* 12MHz (24MHz/2), 83.333ns */ +#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */ +#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */ + +static u32 read_clock_frequency(const struct intel_gt *gt) +{ + if (INTEL_GEN(gt->i915) >= 11) { + u32 config; + + config = intel_uncore_read(gt->uncore, RPM_CONFIG0); + config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; + config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (config) { + case 0: return MHZ_12; + case 1: + case 2: return MHZ_19_2; + default: + case 3: return MHZ_12_5; + } + } else if (INTEL_GEN(gt->i915) >= 9) { + if (IS_GEN9_LP(gt->i915)) + return MHZ_19_2; + else + return MHZ_12; + } else { + return MHZ_12_5; + } +} + +void intel_gt_init_clock_frequency(struct intel_gt *gt) +{ + /* + * Note that on gen11+, the clock frequency may be reconfigured. + * We do not, and we assume nobody else does. + */ + gt->clock_frequency = read_clock_frequency(gt); + GT_TRACE(gt, + "Using clock frequency: %dkHz\n", + gt->clock_frequency / 1000); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_gt_check_clock_frequency(const struct intel_gt *gt) +{ + if (gt->clock_frequency != read_clock_frequency(gt)) { + dev_err(gt->i915->drm.dev, + "GT clock frequency changed, was %uHz, now %uHz!\n", + gt->clock_frequency, + read_clock_frequency(gt)); + } +} +#endif + +static u64 div_u64_roundup(u64 nom, u32 den) +{ + return div_u64(nom + den - 1, den); +} + +u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count) +{ + return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000), + gt->clock_frequency); +} + +u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count) +{ + return intel_gt_clock_interval_to_ns(gt, 16 * count); +} + +u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns) +{ + return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns), + 1000 * 1000 * 1000); +} + +u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns) +{ + u32 val; + + /* + * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS + * 8300) freezing up around GPU hangs. Looks as if even + * scheduling/timer interrupts start misbehaving if the RPS + * EI/thresholds are "bad", leading to a very sluggish or even + * frozen machine. + */ + val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16); + if (IS_GEN(gt->i915, 6)) + val = roundup(val, 25); + + return val; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h new file mode 100644 index 000000000000..f793c89f2cbd --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_GT_CLOCK_UTILS_H__ +#define __INTEL_GT_CLOCK_UTILS_H__ + +#include <linux/types.h> + +struct intel_gt; + +void intel_gt_init_clock_frequency(struct intel_gt *gt); + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_gt_check_clock_frequency(const struct intel_gt *gt); +#else +static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {} +#endif + +u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count); +u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count); + +u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns); +u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns); + +#endif /* __INTEL_GT_CLOCK_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f0e7fd95165a..0cc7dd54f4f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } } + if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { + WRITE_ONCE(engine->execlists.yield, + ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); + ENGINE_TRACE(engine, "semaphore yield: %08x\n", + engine->execlists.yield); + if (del_timer(&engine->execlists.timer)) + tasklet = true; + } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) tasklet = true; @@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; struct intel_uncore *uncore = gt->uncore; const u32 dmask = irqs << 16 | irqs; const u32 smask = irqs << 16; @@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; const u32 gt_interrupts[] = { irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT, irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 8b653c0f5e5f..6bdb434a442d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -12,6 +12,7 @@ #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_llc.h" @@ -138,6 +139,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) wakeref = intel_runtime_pm_get(gt->uncore->rpm); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_gt_check_clock_frequency(gt); + /* * As we have just resumed the machine and woken the device up from * deep PCI sleep (presumably D3_cold), assume the HW has been reset @@ -155,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force) intel_uc_reset_prepare(>->uc); + for_each_engine(engine, gt, id) + if (engine->sanitize) + engine->sanitize(engine); + if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); @@ -164,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (engine->reset.finish) engine->reset.finish(engine); + intel_rps_sanitize(>->rps); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(gt->uncore->rpm, wakeref); } @@ -191,11 +200,12 @@ int intel_gt_resume(struct intel_gt *gt) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + gt_sanitize(gt, true); + intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); - gt_sanitize(gt, true); if (intel_gt_is_wedged(gt)) { err = -EIO; goto out_fw; @@ -204,7 +214,7 @@ int intel_gt_resume(struct intel_gt *gt) /* Only when the HW is re-initialised, can we replay the requests */ err = intel_gt_init_hw(gt); if (err) { - dev_err(gt->i915->drm.dev, + drm_err(>->i915->drm, "Failed to initialize GPU, declaring it wedged!\n"); goto err_wedged; } @@ -220,7 +230,7 @@ int intel_gt_resume(struct intel_gt *gt) intel_engine_pm_put(engine); if (err) { - dev_err(gt->i915->drm.dev, + drm_err(>->i915->drm, "Failed to restart %s (%d)\n", engine->name, err); goto err_wedged; @@ -324,6 +334,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) { GT_TRACE(gt, "\n"); intel_gt_init_swizzling(gt); + intel_ggtt_restore_fences(gt->ggtt); return intel_uc_runtime_resume(>->uc); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 8a5054f21bf8..16ff47c83bd5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -26,6 +26,11 @@ static bool retire_requests(struct intel_timeline *tl) return !i915_active_fence_isset(&tl->last_request); } +static bool engine_active(const struct intel_engine_cs *engine) +{ + return !list_empty(&engine->kernel_context->timeline->requests); +} + static bool flush_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; @@ -37,8 +42,13 @@ static bool flush_submission(struct intel_gt *gt) for_each_engine(engine, gt, id) { intel_engine_flush_submission(engine); - active |= flush_work(&engine->retire_work); - active |= flush_work(&engine->wakeref.work); + + /* Flush the background retirement and idle barriers */ + flush_work(&engine->retire_work); + flush_delayed_work(&engine->wakeref.work); + + /* Is the idle barrier still outstanding? */ + active |= engine_active(engine); } return active; @@ -147,25 +157,32 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) fence = i915_active_fence_get(&tl->last_request); if (fence) { + mutex_unlock(&tl->mutex); + timeout = dma_fence_wait_timeout(fence, interruptible, timeout); dma_fence_put(fence); + + /* Retirement is best effort */ + if (!mutex_trylock(&tl->mutex)) { + active_count++; + goto out_active; + } } } - if (!retire_requests(tl) || flush_submission(gt)) + if (!retire_requests(tl)) active_count++; + mutex_unlock(&tl->mutex); - spin_lock(&timelines->lock); +out_active: spin_lock(&timelines->lock); - /* Resume iteration after dropping lock */ + /* Resume list iteration after reacquiring spinlock */ list_safe_reset_next(tl, tn, link); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - mutex_unlock(&tl->mutex); - /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { GEM_BUG_ON(atomic_read(&tl->active_count)); @@ -177,6 +194,9 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); + if (flush_submission(gt)) /* Wait, there's more! */ + active_count++; + return active_count ? timeout : 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 96890dd12b5f..0cc1d6b185dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -17,6 +17,7 @@ #include "i915_vma.h" #include "intel_engine_types.h" +#include "intel_gt_buffer_pool_types.h" #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" @@ -61,6 +62,7 @@ struct intel_gt { struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ + ktime_t last_init_time; struct intel_reset reset; /** @@ -72,14 +74,12 @@ struct intel_gt { */ intel_wakeref_t awake; + u32 clock_frequency; + struct intel_llc llc; struct intel_rc6 rc6; struct intel_rps rps; - ktime_t last_init_time; - - struct i915_vma *scratch; - spinlock_t irq_lock; u32 gt_imr; u32 pm_ier; @@ -97,6 +97,18 @@ struct intel_gt { * Reserved for exclusive use by the kernel. */ struct i915_address_space *vm; + + /* + * A pool of objects to use as shadow copies of client batch buffers + * when the command parser is enabled. Prevents the client from + * modifying the batch contents after software parsing. + * + * Buffers older than 1s are periodically reaped from the pool, + * or may be reclaimed by the shrinker before then. + */ + struct intel_gt_buffer_pool buffer_pool; + + struct i915_vma *scratch; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index bb9a6e638175..2a72cce63fd9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -171,7 +171,9 @@ void __i915_vm_close(struct i915_address_space *vm) { struct i915_vma *vma, *vn; - mutex_lock(&vm->mutex); + if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) + return; + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; @@ -186,6 +188,7 @@ void __i915_vm_close(struct i915_address_space *vm) i915_gem_object_put(obj); } GEM_BUG_ON(!list_empty(&vm->bound_list)); + mutex_unlock(&vm->mutex); } @@ -484,30 +487,6 @@ void gtt_write_workarounds(struct intel_gt *gt) } } -u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; - - if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; - - switch (level) { - case I915_CACHE_NONE: - pte |= PPAT_UNCACHED; - break; - case I915_CACHE_WT: - pte |= PPAT_DISPLAY_ELLC; - break; - default: - pte |= PPAT_CACHED; - break; - } - - return pte; -} - static void tgl_setup_private_ppat(struct intel_uncore *uncore) { /* TGL doesn't support LLC or AGE settings */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 23004445806a..d93ebdf3fa0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -26,7 +26,6 @@ #include <drm/drm_mm.h> #include "gt/intel_reset.h" -#include "i915_gem_fence_reg.h" #include "i915_selftest.h" #include "i915_vma_types.h" @@ -135,6 +134,8 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) +struct i915_fence_reg; + #define for_each_sgt_daddr(__dp, __iter, __sgt) \ __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) @@ -333,7 +334,7 @@ struct i915_ggtt { u32 pin_bias; unsigned int num_fences; - struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; + struct i915_fence_reg *fence_regs; struct list_head fence_list; /** @@ -429,8 +430,7 @@ static inline void i915_vm_close(struct i915_address_space *vm) { GEM_BUG_ON(!atomic_read(&vm->open)); - if (atomic_dec_and_test(&vm->open)) - __i915_vm_close(vm); + __i915_vm_close(vm); i915_vm_put(vm); } @@ -515,10 +515,6 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); -u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags); - int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ba31cbe8c68e..87e6c5bdd2dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -147,6 +147,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "intel_workarounds.h" +#include "shmem_utils.h" #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) @@ -216,7 +217,7 @@ struct virtual_engine { /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; - struct intel_engine_cs *siblings[0]; + struct intel_engine_cs *siblings[]; }; static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) @@ -238,6 +239,123 @@ __execlists_update_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine, u32 head); +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + +static int lrc_ring_gpr0(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x74; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x68; + else if (engine->class == RENDER_CLASS) + return 0xd8; + else + return -1; +} + +static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x12; + else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) + return 0x18; + else + return -1; +} + +static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_wa_bb_per_ctx(engine); + if (x < 0) + return x; + + return x + 2; +} + +static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_indirect_ptr(engine); + if (x < 0) + return x; + + return x + 2; +} + +static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) +{ + if (engine->class != RENDER_CLASS) + return -1; + + if (INTEL_GEN(engine->i915) >= 12) + return 0xb6; + else if (INTEL_GEN(engine->i915) >= 11) + return 0xaa; + else + return -1; +} + +static u32 +lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + fallthrough; + case 12: + return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 11: + return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 10: + return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 9: + return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 8: + return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + } +} + +static void +lrc_ring_setup_indirect_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr, + u32 size) +{ + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); + GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); + regs[lrc_ring_indirect_ptr(engine) + 1] = + ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); + + GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); + regs[lrc_ring_indirect_offset(engine) + 1] = + lrc_ring_indirect_offset_default(engine) << 6; +} + +static u32 intel_context_get_runtime(const struct intel_context *ce) +{ + /* + * We can use either ppHWSP[16] which is recorded before the context + * switch (and so excludes the cost of context switches) or use the + * value from the context image itself, which is saved/restored earlier + * and so includes the cost of the save. + */ + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); +} + static void mark_eio(struct i915_request *rq) { if (i915_request_completed(rq)) @@ -245,7 +363,7 @@ static void mark_eio(struct i915_request *rq) GEM_BUG_ON(i915_request_signaled(rq)); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); } @@ -293,7 +411,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority; + return READ_ONCE(rq->sched.attr.priority); } static int effective_prio(const struct i915_request *rq) @@ -311,18 +429,7 @@ static int effective_prio(const struct i915_request *rq) if (i915_request_has_nopreempt(rq)) prio = I915_PRIORITY_UNPREEMPTABLE; - /* - * On unwinding the active request, we give it a priority bump - * if it has completed waiting on any semaphore. If we know that - * the request has already started, we can prevent an unwanted - * preempt-to-idle cycle by taking that into account now. - */ - if (__i915_request_has_started(rq)) - prio |= I915_PRIORITY_NOSEMAPHORE; - - /* Restrict mere WAIT boosts from triggering preemption */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ - return prio | __NO_PREEMPTION; + return prio; } static int queue_prio(const struct intel_engine_execlists *execlists) @@ -456,10 +563,10 @@ assert_priority_queue(const struct i915_request *prev, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static u64 +static u32 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - u64 desc; + u32 desc; desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) @@ -470,21 +577,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } - - return desc; + return i915_ggtt_offset(ce->state) | desc; } static inline unsigned int dword_in_page(void *addr) @@ -503,7 +596,7 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END(x) 0, (x) +#define END(total_state_size) 0, (total_state_size) { const u32 base = engine->mmio_base; @@ -526,7 +619,7 @@ static void set_offsets(u32 *regs, if (flags & POSTED) *regs |= MI_LRI_FORCE_POSTED; if (INTEL_GEN(engine->i915) >= 11) - *regs |= MI_LRI_CS_MMIO; + *regs |= MI_LRI_LRM_CS_MMIO; regs++; GEM_BUG_ON(!count); @@ -911,8 +1004,63 @@ static const u8 gen12_rcs_offsets[] = { NOP(6), LRI(1, 0), REG(0x0c8), + NOP(3 + 9 + 1), + + LRI(51, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + REG(0x084), + NOP(1), - END(80) + END(192) }; #undef END @@ -1004,7 +1152,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } - rq->engine = owner; + WRITE_ONCE(rq->engine, owner); owner->submit_request(rq); active = NULL; } @@ -1040,17 +1188,14 @@ static void intel_engine_context_in(struct intel_engine_cs *engine) { unsigned long flags; - if (READ_ONCE(engine->stats.enabled) == 0) + if (atomic_add_unless(&engine->stats.active, 1, 0)) return; write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); + if (!atomic_add_unless(&engine->stats.active, 1, 0)) { + engine->stats.start = ktime_get(); + atomic_inc(&engine->stats.active); } - write_sequnlock_irqrestore(&engine->stats.lock, flags); } @@ -1058,51 +1203,20 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) { unsigned long flags; - if (READ_ONCE(engine->stats.enabled) == 0) + GEM_BUG_ON(!atomic_read(&engine->stats.active)); + + if (atomic_add_unless(&engine->stats.active, -1, 1)) return; write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } + if (atomic_dec_and_test(&engine->stats.active)) { + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); } - write_sequnlock_irqrestore(&engine->stats.lock, flags); } -static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x54; - else if (engine->class == RENDER_CLASS) - return 0x58; - else - return -1; -} - static void execlists_check_context(const struct intel_context *ce, const struct intel_engine_cs *engine) @@ -1146,14 +1260,12 @@ execlists_check_context(const struct intel_context *ce, static void restore_default_state(struct intel_context *ce, struct intel_engine_cs *engine) { - u32 *regs = ce->lrc_reg_state; + u32 *regs; - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); + regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); + execlists_init_reg_state(regs, ce, engine, ce->ring, true); - execlists_init_reg_state(regs, ce, engine, ce->ring, false); + ce->runtime.last = intel_context_get_runtime(ce); } static void reset_active(struct i915_request *rq, @@ -1192,18 +1304,7 @@ static void reset_active(struct i915_request *rq, __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; -} - -static u32 intel_context_get_runtime(const struct intel_context *ce) -{ - /* - * We can use either ppHWSP[16] which is recorded before the context - * switch (and so excludes the cost of context switches) or use the - * value from the context image itself, which is saved/restored earlier - * and so includes the cost of the save. - */ - return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) @@ -1251,18 +1352,23 @@ __execlists_schedule_in(struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) execlists_check_context(ce, engine); - ce->lrc_desc &= ~GENMASK_ULL(47, 37); if (ce->tag) { /* Use a fixed tag for OA and friends */ - ce->lrc_desc |= (u64)ce->tag << 32; + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); + ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc |= - (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << - GEN11_SW_CTX_ID_SHIFT; - BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); } + ce->lrc.ccid |= engine->execlists.ccid; + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1302,7 +1408,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) static inline void __execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) + struct intel_engine_cs * const engine, + unsigned int ccid) { struct intel_context * const ce = rq->context; @@ -1316,10 +1423,18 @@ __execlists_schedule_out(struct i915_request *rq, * If we have just completed this context, the engine may now be * idle and we want to re-enter powersaving. */ - if (list_is_last(&rq->link, &ce->timeline->requests) && + if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + set_bit(ccid - 1, &engine->context_tag); + } + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -1345,15 +1460,17 @@ execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; + u32 ccid; trace_i915_request_out(rq); + ccid = rq->context->lrc.ccid; old = READ_ONCE(ce->inflight); do cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; while (!try_cmpxchg(&ce->inflight, &old, cur)); if (!cur) - __execlists_schedule_out(rq, old); + __execlists_schedule_out(rq, old, ccid); i915_request_put(rq); } @@ -1361,7 +1478,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; u32 tail, prev; /* @@ -1400,7 +1517,7 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1415,6 +1532,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc } } +static __maybe_unused char * +dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) +{ + if (!rq) + return ""; + + snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", + prefix, + rq->context->lrc.ccid, + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + rq_prio(rq)); + + return buf; +} + static __maybe_unused void trace_ports(const struct intel_engine_execlists *execlists, const char *msg, @@ -1422,18 +1557,14 @@ trace_ports(const struct intel_engine_execlists *execlists, { const struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + char __maybe_unused p0[40], p1[40]; if (!ports[0]) return; - ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg, - ports[0]->fence.context, - ports[0]->fence.seqno, - i915_request_completed(ports[0]) ? "!" : - i915_request_started(ports[0]) ? "*" : - "", - ports[1] ? ports[1]->fence.context : 0, - ports[1] ? ports[1]->fence.seqno : 0); + ENGINE_TRACE(engine, "%s { %s%s }\n", msg, + dump_port(p0, sizeof(p0), "", ports[0]), + dump_port(p1, sizeof(p1), ", ", ports[1])); } static inline bool @@ -1446,8 +1577,12 @@ static __maybe_unused bool assert_pending_valid(const struct intel_engine_execlists *execlists, const char *msg) { + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); struct i915_request * const *port, *rq; struct intel_context *ce = NULL; + bool sentinel = false; + u32 ccid = -1; trace_ports(execlists, msg, execlists->pending); @@ -1456,13 +1591,14 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, return true; if (!execlists->pending[0]) { - GEM_TRACE_ERR("Nothing pending for promotion!\n"); + GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", + engine->name); return false; } if (execlists->pending[execlists_num_ports(execlists)]) { - GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", - execlists_num_ports(execlists)); + GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", + engine->name, execlists_num_ports(execlists)); return false; } @@ -1474,13 +1610,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, GEM_BUG_ON(!i915_request_is_active(rq)); if (ce == rq->context) { - GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); return false; } ce = rq->context; + if (ccid == ce->lrc.ccid) { + GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", + engine->name, + ccid, ce->timeline->fence_context, + port - execlists->pending); + return false; + } + ccid = ce->lrc.ccid; + + /* + * Sentinels are supposed to be lonely so they flush the + * current exection off the HW. Check that they are the + * only request in the pending submission. + */ + if (sentinel) { + GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + + sentinel = i915_request_has_sentinel(rq); + if (sentinel && port != execlists->pending) { + GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) continue; @@ -1490,7 +1658,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, if (i915_active_is_idle(&ce->active) && !intel_context_is_barrier(ce)) { - GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); ok = false; @@ -1498,7 +1667,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } if (!i915_vma_is_pinned(ce->state)) { - GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); ok = false; @@ -1506,7 +1676,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } if (!i915_vma_is_pinned(ce->ring->vma)) { - GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", + GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", + engine->name, ce->timeline->fence_context, port - execlists->pending); ok = false; @@ -1576,6 +1747,11 @@ static bool can_merge_ctx(const struct intel_context *prev, return true; } +static unsigned long i915_request_flags(const struct i915_request *rq) +{ + return READ_ONCE(rq->fence.flags); +} + static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { @@ -1593,7 +1769,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely((prev->fence.flags ^ next->fence.flags) & + if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & (BIT(I915_FENCE_FLAG_NOPREEMPT) | BIT(I915_FENCE_FLAG_SENTINEL)))) return false; @@ -1601,6 +1777,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (!can_merge_ctx(prev->context, next->context)) return false; + GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); return true; } @@ -1635,31 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct intel_engine_cs *engine) +static void virtual_xfer_breadcrumbs(struct virtual_engine *ve) { - struct intel_engine_cs *old = ve->siblings[0]; - - /* All unattached (rq->engine == old) must already be completed */ - - spin_lock(&old->breadcrumbs.irq_lock); - if (!list_empty(&ve->context.signal_link)) { - list_move_tail(&ve->context.signal_link, - &engine->breadcrumbs.signalers); - intel_engine_signal_breadcrumbs(engine); - } - spin_unlock(&old->breadcrumbs.irq_lock); -} - -static struct i915_request * -last_active(const struct intel_engine_execlists *execlists) -{ - struct i915_request * const *last = READ_ONCE(execlists->active); - - while (*last && i915_request_completed(*last)) - last++; - - return *last; + /* + * All the outstanding signals on ve->siblings[0] must have + * been completed, just pending the interrupt handler. As those + * signals still refer to the old sibling (via rq->engine), we must + * transfer those to the old irq_worker to keep our locking + * consistent. + */ + intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context); } #define for_each_waiter(p__, rq__) \ @@ -1668,9 +1830,9 @@ last_active(const struct intel_engine_execlists *execlists) wait_link) #define for_each_signaler(p__, rq__) \ - list_for_each_entry_lockless(p__, \ - &(rq__)->sched.signalers_list, \ - signal_link) + list_for_each_entry_rcu(p__, \ + &(rq__)->sched.signalers_list, \ + signal_link) static void defer_request(struct i915_request *rq, struct list_head * const pl) { @@ -1693,12 +1855,16 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; /* No waiter should start before its signaler */ - GEM_BUG_ON(i915_request_started(w) && + GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && + i915_request_started(w) && !i915_request_completed(rq)); GEM_BUG_ON(i915_request_is_active(w)); @@ -1728,22 +1894,47 @@ static void defer_active(struct intel_engine_cs *engine) } static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int hint; if (!intel_engine_has_timeslices(engine)) return false; - if (list_is_last(&rq->sched.link, &engine->active.requests)) - return false; - - hint = max(rq_prio(list_next_entry(rq, sched.link)), - engine->execlists.queue_priority_hint); + hint = engine->execlists.queue_priority_hint; + if (!list_is_last(&rq->sched.link, &engine->active.requests)) + hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); return hint >= effective_prio(rq); } +static bool +timeslice_yield(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice (i.e. + * until it is scheduled out and replaced by a new submission, + * possibly even its own lite-restore). The HW only sends an interrupt + * on the first miss, and we do know if that semaphore has been + * signaled, or even if it is now stuck on another semaphore. Play + * safe, yield if it might be stuck -- it will be given a fresh + * timeslice in the near future. + */ + return rq->context->lrc.ccid == READ_ONCE(el->yield); +} + +static bool +timeslice_expired(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + return timer_expired(&el->timer) || timeslice_yield(el, rq); +} + static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { @@ -1759,15 +1950,15 @@ timeslice(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.timeslice_duration_ms); } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine) { - const struct i915_request *rq = *engine->execlists.active; + const struct intel_engine_execlists *execlists = &engine->execlists; + const struct i915_request *rq = *execlists->active; if (!rq || i915_request_completed(rq)) return 0; - if (engine->execlists.switch_priority_hint < effective_prio(rq)) + if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) return 0; return timeslice(engine); @@ -1775,10 +1966,39 @@ active_timeslice(const struct intel_engine_cs *engine) static void set_timeslice(struct intel_engine_cs *engine) { + unsigned long duration; + if (!intel_engine_has_timeslices(engine)) return; - set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); + duration = active_timeslice(engine); + ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration); + + set_timer_ms(&engine->execlists.timer, duration); +} + +static void start_timeslice(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + const int prio = queue_prio(execlists); + unsigned long duration; + + if (!intel_engine_has_timeslices(engine)) + return; + + WRITE_ONCE(execlists->switch_priority_hint, prio); + if (prio == INT_MIN) + return; + + if (timer_pending(&execlists->timer)) + return; + + duration = timeslice(engine); + ENGINE_TRACE(engine, + "start timeslicing, prio:%d, interval:%lu", + prio, duration); + + set_timer_ms(&execlists->timer, duration); } static void record_preemption(struct intel_engine_execlists *execlists) @@ -1786,11 +2006,9 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } -static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) { - struct i915_request *rq; - - rq = last_active(&engine->execlists); if (!rq) return 0; @@ -1801,13 +2019,14 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) return READ_ONCE(engine->props.preempt_timeout_ms); } -static void set_preempt_timeout(struct intel_engine_cs *engine) +static void set_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) { if (!intel_engine_has_preempt_reset(engine)) return; set_timer_ms(&engine->execlists.preempt, - active_preempt_timeout(engine)); + active_preempt_timeout(engine, rq)); } static inline void clear_ports(struct i915_request **ports, int count) @@ -1820,6 +2039,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request * const *active; struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -1874,9 +2094,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * i.e. we will retrigger preemption following the ack in case * of trouble. */ - last = last_active(execlists); - if (last) { + active = READ_ONCE(execlists->active); + + /* + * In theory we can skip over completed contexts that have not + * yet been processed by events (as those events are in flight): + * + * while ((last = *active) && i915_request_completed(last)) + * active++; + * + * However, the GPU cannot handle this as it will ultimately + * find itself trying to jump back into a context it has just + * completed and barf. + */ + + if ((last = *active)) { if (need_preempt(engine, last, rb)) { + if (i915_request_completed(last)) { + tasklet_hi_schedule(&execlists->tasklet); + return; + } + ENGINE_TRACE(engine, "preempting last=%llx:%lld, prio=%d, hint=%d\n", last->fence.context, @@ -1903,13 +2141,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = NULL; } else if (need_timeslice(engine, last) && - timer_expired(&engine->execlists.timer)) { + timeslice_expired(execlists, last)) { + if (i915_request_completed(last)) { + tasklet_hi_schedule(&execlists->tasklet); + return; + } + ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d\n", + "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + execlists->queue_priority_hint, + yesno(timeslice_yield(execlists, last))); ring_set_paused(engine, 1); defer_active(engine); @@ -1944,11 +2188,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - if (!execlists->timer.expires && - need_timeslice(engine, last)) - set_timer_ms(&execlists->timer, - timeslice(engine)); - + start_timeslice(engine); return; } } @@ -1983,7 +2223,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this for another */ + start_timeslice(engine); + return; /* leave this for another sibling */ } ENGINE_TRACE(engine, @@ -1995,13 +2236,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) "", yesno(engine != ve->siblings[0])); - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; + WRITE_ONCE(ve->request, NULL); + WRITE_ONCE(ve->base.execlists.queue_priority_hint, + INT_MIN); rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); GEM_BUG_ON(!(rq->execution_mask & engine->mask)); - rq->engine = engine; + WRITE_ONCE(rq->engine, engine); if (engine != ve->siblings[0]) { u32 *regs = ve->context.lrc_reg_state; @@ -2014,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, engine); + virtual_xfer_breadcrumbs(ve); /* * Move the bound engine to the top of the list @@ -2121,6 +2363,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(last && !can_merge_ctx(last->context, rq->context)); + GEM_BUG_ON(last && + i915_seqno_passed(last->fence.seqno, + rq->fence.seqno)); submit = true; last = rq; @@ -2159,7 +2404,7 @@ done: * Skip if we ended up with exactly the same set of requests, * e.g. trying to timeslice a pair of ordered contexts */ - if (!memcmp(execlists->active, execlists->pending, + if (!memcmp(active, execlists->pending, (port - execlists->pending + 1) * sizeof(*port))) { do execlists_schedule_out(fetch_and_zero(port)); @@ -2169,8 +2414,9 @@ done: } clear_ports(port + 1, last_port - port); + WRITE_ONCE(execlists->yield, -1); + set_preempt_timeout(engine, *active); execlists_submit_ports(engine); - set_preempt_timeout(engine); } else { skip_submit: ring_set_paused(engine, 0); @@ -2191,6 +2437,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) execlists_schedule_out(*port); clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); + smp_wmb(); /* complete the seqlock for execlists_active() */ WRITE_ONCE(execlists->active, execlists->inflight); } @@ -2339,12 +2586,11 @@ static void process_csb(struct intel_engine_cs *engine) if (promote) { struct i915_request * const *old = execlists->active; - GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - ring_set_paused(engine, 0); /* Point active to the new ELSP; prevent overwriting */ WRITE_ONCE(execlists->active, execlists->pending); + smp_wmb(); /* notify execlists_active() */ /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", old); @@ -2352,11 +2598,13 @@ static void process_csb(struct intel_engine_cs *engine) execlists_schedule_out(*old++); /* switch pending to inflight */ - WRITE_ONCE(execlists->active, - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending))); + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending)); + smp_wmb(); /* complete the seqlock */ + WRITE_ONCE(execlists->active, execlists->inflight); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -2369,17 +2617,21 @@ static void process_csb(struct intel_engine_cs *engine) * We rely on the hardware being strongly * ordered, that the breadcrumb write is * coherent (visible from the CPU) before the - * user interrupt and CSB is processed. + * user interrupt is processed. One might assume + * that the breadcrumb write being before the + * user interrupt and the CS event for the context + * switch would therefore be before the CS event + * itself... */ if (GEM_SHOW_DEBUG() && - !i915_request_completed(*execlists->active) && - !reset_in_progress(execlists)) { - struct i915_request *rq __maybe_unused = - *execlists->active; + !i915_request_completed(*execlists->active)) { + struct i915_request *rq = *execlists->active; const u32 *regs __maybe_unused = rq->context->lrc_reg_state; ENGINE_TRACE(engine, + "context completed before request!\n"); + ENGINE_TRACE(engine, "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", ENGINE_READ(engine, RING_START), ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, @@ -2398,8 +2650,6 @@ static void process_csb(struct intel_engine_cs *engine) regs[CTX_RING_START], regs[CTX_RING_HEAD], regs[CTX_RING_TAIL]); - - GEM_BUG_ON("context completed before request"); } execlists_schedule_out(*execlists->active++); @@ -2533,11 +2783,13 @@ unlock: static bool hold_request(const struct i915_request *rq) { struct i915_dependency *p; + bool result = false; /* * If one of our ancestors is on hold, we must also be on hold, * otherwise we will bypass it and execute before it. */ + rcu_read_lock(); for_each_signaler(p, rq) { const struct i915_request *s = container_of(p->signaler, typeof(*s), sched); @@ -2545,11 +2797,13 @@ static bool hold_request(const struct i915_request *rq) if (s->engine != rq->engine) continue; - if (i915_request_on_hold(s)) - return true; + result = i915_request_on_hold(s); + if (result) + break; } + rcu_read_unlock(); - return false; + return result; } static void __execlists_unhold(struct i915_request *rq) @@ -2575,6 +2829,10 @@ static void __execlists_unhold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + /* Propagate any change in error status */ + if (rq->fence.error) + i915_request_set_error_once(w, rq->fence.error); + if (w->engine != rq->engine) continue; @@ -2681,6 +2939,45 @@ err_cap: return NULL; } +static struct i915_request * +active_context(struct intel_engine_cs *engine, u32 ccid) +{ + const struct intel_engine_execlists * const el = &engine->execlists; + struct i915_request * const *port, *rq; + + /* + * Use the most recent result from process_csb(), but just in case + * we trigger an error (via interrupt) before the first CS event has + * been written, peek at the next submission. + */ + + for (port = el->active; (rq = *port); port++) { + if (rq->context->lrc.ccid == ccid) { + ENGINE_TRACE(engine, + "ccid found at active:%zd\n", + port - el->active); + return rq; + } + } + + for (port = el->pending; (rq = *port); port++) { + if (rq->context->lrc.ccid == ccid) { + ENGINE_TRACE(engine, + "ccid found at pending:%zd\n", + port - el->pending); + return rq; + } + } + + ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); + return NULL; +} + +static u32 active_ccid(struct intel_engine_cs *engine) +{ + return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); +} + static bool execlists_capture(struct intel_engine_cs *engine) { struct execlists_capture *cap; @@ -2698,7 +2995,7 @@ static bool execlists_capture(struct intel_engine_cs *engine) return true; spin_lock_irq(&engine->active.lock); - cap->rq = execlists_active(&engine->execlists); + cap->rq = active_context(engine, active_ccid(engine)); if (cap->rq) { cap->rq = active_request(cap->rq->context->timeline, cap->rq); cap->rq = i915_request_get_rcu(cap->rq); @@ -2846,10 +3143,14 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) if (reset_in_progress(execlists)) return; /* defer until we restart the engine following reset */ - if (execlists->tasklet.func == execlists_submission_tasklet) - __execlists_submission_tasklet(engine); - else - tasklet_hi_schedule(&execlists->tasklet); + /* Hopefully we clear execlists->pending[] to let us through */ + if (READ_ONCE(execlists->pending[0]) && + tasklet_trylock(&execlists->tasklet)) { + process_csb(engine); + tasklet_unlock(&execlists->tasklet); + } + + __execlists_submission_tasklet(engine); } static void submit_queue(struct intel_engine_cs *engine, @@ -2935,19 +3236,139 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) vaddr += engine->context_size; if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) - dev_err_once(engine->i915->drm.dev, + drm_err_once(&engine->i915->drm, "%s context redzone overwritten!\n", engine->name); } static void execlists_context_unpin(struct intel_context *ce) { - check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); i915_gem_object_unpin_map(ce->state->obj); } +static u32 * +gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + CTX_TIMESTAMP * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); + + return cs; +} + +static u32 * +gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) +{ + GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); + *cs++ = 0; + + return cs; +} + +static u32 * +gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) +{ + GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); + + return cs; +} + +static u32 * +gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) +{ + cs = gen12_emit_timestamp_wa(ce, cs); + cs = gen12_emit_cmd_buf_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + + return cs; +} + +static u32 * +gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) +{ + cs = gen12_emit_timestamp_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + + return cs; +} + +static inline u32 context_wa_bb_offset(const struct intel_context *ce) +{ + return PAGE_SIZE * ce->wa_bb_page; +} + +static u32 *context_indirect_bb(const struct intel_context *ce) +{ + void *ptr; + + GEM_BUG_ON(!ce->wa_bb_page); + + ptr = ce->lrc_reg_state; + ptr -= LRC_STATE_OFFSET; /* back to start of context image */ + ptr += context_wa_bb_offset(ce); + + return ptr; +} + +static void +setup_indirect_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + u32 * const start = context_indirect_bb(ce); + u32 *cs; + + cs = emit(ce, start); + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + while ((unsigned long)cs % CACHELINE_BYTES) + *cs++ = MI_NOOP; + + lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine, + i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce), + (cs - start) * sizeof(*cs)); +} + static void __execlists_update_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -2962,6 +3383,7 @@ __execlists_update_reg_state(const struct intel_context *ce, regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); regs[CTX_RING_HEAD] = head; regs[CTX_RING_TAIL] = ring->tail; + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; /* RPCS */ if (engine->class == RENDER_CLASS) { @@ -2970,6 +3392,18 @@ __execlists_update_reg_state(const struct intel_context *ce, i915_oa_init_reg_state(ce, engine); } + + if (ce->wa_bb_page) { + u32 *(*fn)(const struct intel_context *ce, u32 *cs); + + fn = gen12_emit_indirect_ctx_xcs; + if (ce->engine->class == RENDER_CLASS) + fn = gen12_emit_indirect_ctx_rcs; + + /* Mutually exclusive wrt to global indirect bb */ + GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); + setup_indirect_ctx_bb(ce, engine, fn); + } } static int @@ -2987,8 +3421,8 @@ __execlists_context_pin(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; - ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; __execlists_update_reg_state(ce, engine, ce->ring->tail); return 0; @@ -3016,7 +3450,7 @@ static void execlists_context_reset(struct intel_context *ce) ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -3036,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; + GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); if (!i915_request_timeline(rq)->has_initial_breadcrumb) return 0; @@ -3062,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) /* Record the updated position of the request's payload */ rq->infix = intel_ring_offset(rq, cs); + __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); + + return 0; +} + +static int emit_pdps(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); + int err, i; + u32 *cs; + + GEM_BUG_ON(intel_vgpu_active(rq->i915)); + + /* + * Beware ye of the dragons, this sequence is magic! + * + * Small changes to this sequence can cause anything from + * GPU hangs to forcewake errors and machine lockups! + */ + + /* Flush any residual operations from the context load */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; + + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Ensure the LRI have landed before we invalidate & continue */ + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + u32 base = engine->mmio_base; + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + return 0; } @@ -3086,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request) * to cancel/unwind this request now. */ + if (!i915_vm_is_4lvl(request->context->vm)) { + ret = emit_pdps(request); + if (ret) + return ret; + } + /* Unconditionally invalidate GPU caches and TLBs. */ ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) @@ -3386,7 +3877,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) ret = lrc_setup_wa_ctx(engine); if (ret) { - DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); + drm_dbg(&engine->i915->drm, + "Failed to setup context WA page: %d\n", ret); return ret; } @@ -3419,6 +3911,72 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static void reset_csb_pointers(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + const unsigned int reset_value = execlists->csb_size - 1; + + ring_set_paused(engine, 0); + + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + /* + * After a reset, the HW starts writing into CSB entry [0]. We + * therefore have to set our HEAD pointer back one entry so that + * the *first* entry we check is entry 0. To complicate this further, + * as we don't wait for the first interrupt after reset, we have to + * fake the HW write to point back to the last entry so that our + * inline comparison of our cached head position against the last HW + * write works even before the first interrupt. + */ + execlists->csb_head = reset_value; + WRITE_ONCE(*execlists->csb_write, reset_value); + wmb(); /* Make sure this is visible to HW (paranoia?) */ + + invalidate_csb_entries(&execlists->csb_status[0], + &execlists->csb_status[reset_value]); + + /* Once more for luck and our trusty paranoia */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); +} + +static void execlists_sanitize(struct intel_engine_cs *engine) +{ + /* + * Poison residual state on resume, in case the suspend didn't! + * + * We have to assume that across suspend/resume (or other loss + * of control) that the contents of our pinned buffers has been + * lost, replaced by garbage. Since this doesn't always happen, + * let's poison such state so that we more quickly spot when + * we falsely assume it has been preserved. + */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); + + reset_csb_pointers(engine); + + /* + * The kernel_context HWSP is stored in the status_page. As above, + * that may be lost on resume/initialisation, and so we need to + * reset the value in the HWSP. + */ + intel_timeline_reset_seqno(engine->kernel_context->timeline); + + /* And scrub the dirty cachelines for the HWSP */ + clflush_cache_range(engine->status_page.addr, PAGE_SIZE); +} + static void enable_error_interrupt(struct intel_engine_cs *engine) { u32 status; @@ -3429,7 +3987,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine) status = ENGINE_READ(engine, RING_ESR); if (unlikely(status)) { - dev_err(engine->i915->drm.dev, + drm_err(&engine->i915->drm, "engine '%s' resumed still in error: %08x\n", engine->name, status); __intel_gt_reset(engine->gt, engine->mask); @@ -3485,7 +4043,7 @@ static void enable_execlists(struct intel_engine_cs *engine) enable_error_interrupt(engine); - engine->context_tag = 0; + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); } static bool unexpected_starting_state(struct intel_engine_cs *engine) @@ -3493,7 +4051,8 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) bool unexpected = false; if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { - DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); + drm_dbg(&engine->i915->drm, + "STOP_RING still set in RING_MI_MODE\n"); unexpected = true; } @@ -3553,39 +4112,10 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * * FIXME: Wa for more modern gens needs to be validated */ + ring_set_paused(engine, 1); intel_engine_stop_cs(engine); -} - -static void reset_csb_pointers(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - const unsigned int reset_value = execlists->csb_size - 1; - - ring_set_paused(engine, 0); - - /* - * After a reset, the HW starts writing into CSB entry [0]. We - * therefore have to set our HEAD pointer back one entry so that - * the *first* entry we check is entry 0. To complicate this further, - * as we don't wait for the first interrupt after reset, we have to - * fake the HW write to point back to the last entry so that our - * inline comparison of our cached head position against the last HW - * write works even before the first interrupt. - */ - execlists->csb_head = reset_value; - WRITE_ONCE(*execlists->csb_write, reset_value); - wmb(); /* Make sure this is visible to HW (paranoia?) */ - /* - * Sometimes Icelake forgets to reset its pointers on a GPU reset. - * Bludgeon them with a mmio update to be sure. - */ - ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - reset_value << 8 | reset_value); - ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); + engine->execlists.reset_ccid = active_ccid(engine); } static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) @@ -3628,13 +4158,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * its request, it was still running at the time of the * reset and will have been clobbered. */ - rq = execlists_active(execlists); + rq = active_context(engine, engine->execlists.reset_ccid); if (!rq) goto unwind; - /* We still have requests in-flight; the engine should be active */ - GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - ce = rq->context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); @@ -3644,8 +4171,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) goto out_replay; } + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); head = intel_ring_wrap(ce->ring, rq->head); GEM_BUG_ON(head == ce->ring->tail); @@ -3677,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * image back to the expected values to skip over the guilty request. */ __i915_request_reset(rq, stalled); - if (!stalled) - goto out_replay; /* * We want a simple context + ring to execute the breadcrumb update. @@ -3688,15 +4217,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - restore_default_state(ce, engine); - out_replay: ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine, head); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -3719,7 +4245,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) static void nop_submission_tasklet(unsigned long data) { + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + /* The driver is wedged; don't process any more events. */ + WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) @@ -4053,6 +4582,42 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } +static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +{ + static const i915_reg_t vd[] = { + GEN12_VD0_AUX_NV, + GEN12_VD1_AUX_NV, + GEN12_VD2_AUX_NV, + GEN12_VD3_AUX_NV, + }; + + static const i915_reg_t ve[] = { + GEN12_VE0_AUX_NV, + GEN12_VE1_AUX_NV, + }; + + if (engine->class == VIDEO_DECODE_CLASS) + return vd[engine->instance]; + + if (engine->class == VIDEO_ENHANCEMENT_CLASS) + return ve[engine->instance]; + + GEM_BUG_ON("unknown aux_inv_reg\n"); + + return INVALID_MMIO_REG; +} + +static u32 * +gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(inv_reg); + *cs++ = AUX_INV; + *cs++ = MI_NOOP; + + return cs; +} + static int gen12_emit_flush_render(struct i915_request *request, u32 mode) { @@ -4061,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request, u32 *cs; flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl */ flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -4078,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -4093,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; - cs = intel_ring_begin(request, 8); + cs = intel_ring_begin(request, 8 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -4113,29 +4679,62 @@ static int gen12_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + /* hsdes: 1809175790 */ + cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); + *cs++ = preparser_disable(false); intel_ring_advance(request, cs); + } - /* - * Wa_1604544889:tgl - */ - if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { - flags = 0; - flags |= PIPE_CONTROL_CS_STALL; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + return 0; +} - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; +static int gen12_emit_flush(struct i915_request *request, u32 mode) +{ + intel_engine_mask_t aux_inv = 0; + u32 cmd, *cs; + + if (mode & EMIT_INVALIDATE) + aux_inv = request->engine->mask & ~BIT(BCS0); + + cs = intel_ring_begin(request, + 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0)); + if (IS_ERR(cs)) + return PTR_ERR(cs); - cs = intel_ring_begin(request, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); + cmd = MI_FLUSH_DW + 1; + + /* We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - cs = gen8_emit_pipe_control(cs, flags, - LRC_PPHWSP_SCRATCH_ADDR); - intel_ring_advance(request, cs); + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (request->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + + if (aux_inv) { /* hsdes: 1809175790 */ + struct intel_engine_cs *engine; + unsigned int tmp; + + *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); + for_each_engine_masked(engine, request->engine->gt, + aux_inv, tmp) { + *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); + *cs++ = AUX_INV; } + *cs++ = MI_NOOP; } + intel_ring_advance(request, cs); return 0; } @@ -4169,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen8_emit_fini_breadcrumb_footer(struct i915_request *request, - u32 *cs) +gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4184,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request, return gen8_emit_wa_tail(request, cs); } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); + u32 addr = i915_request_active_timeline(request)->hwsp_offset; - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0); +} + +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) @@ -4209,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } static u32 * @@ -4225,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } /* @@ -4263,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4277,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); - - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 * gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_HDC_PIPELINE_FLUSH); + cs = gen12_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(request, cs); } static void execlists_park(struct intel_engine_cs *engine) @@ -4329,8 +4925,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) @@ -4352,6 +4951,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine) static void execlists_release(struct intel_engine_cs *engine) { + engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ + execlists_shutdown(engine); intel_engine_cleanup_common(engine); @@ -4371,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; - if (INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) { engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; - + engine->emit_flush = gen12_emit_flush; + } engine->set_default_submission = intel_execlists_set_default_submission; if (INTEL_GEN(engine->i915) < 11) { @@ -4409,6 +5011,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) @@ -4453,7 +5056,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) * because we only expect rare glitches but nothing * critical to prevent us from using GPU */ - DRM_ERROR("WA batch buffer initialization failed\n"); + drm_err(&i915->drm, "WA batch buffer initialization failed\n"); if (HAS_LOGICAL_RING_ELSQ(i915)) { execlists->submit_reg = uncore->regs + @@ -4476,48 +5079,18 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; - reset_csb_pointers(engine); + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } /* Finally, take ownership and responsibility for cleanup! */ + engine->sanitize = execlists_sanitize; engine->release = execlists_release; return 0; } -static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) -{ - u32 indirect_ctx_offset; - - switch (INTEL_GEN(engine->i915)) { - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - /* fall through */ - case 12: - indirect_ctx_offset = - GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 11: - indirect_ctx_offset = - GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 10: - indirect_ctx_offset = - GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 9: - indirect_ctx_offset = - GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - case 8: - indirect_ctx_offset = - GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - break; - } - - return indirect_ctx_offset; -} - - static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, const struct intel_ring *ring, @@ -4535,30 +5108,27 @@ static void init_common_reg_state(u32 * const regs, regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + regs[CTX_TIMESTAMP] = 0; } static void init_wa_bb_reg_state(u32 * const regs, - const struct intel_engine_cs *engine, - u32 pos_bb_per_ctx) + const struct intel_engine_cs *engine) { const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; if (wa_ctx->per_ctx.size) { const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - regs[pos_bb_per_ctx] = + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } if (wa_ctx->indirect_ctx.size) { - const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[pos_bb_per_ctx + 2] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[pos_bb_per_ctx + 4] = - intel_lr_indirect_ctx_offset(engine) << 6; + lrc_ring_setup_indirect_ctx(regs, engine, + i915_ggtt_offset(wa_ctx->vma) + + wa_ctx->indirect_ctx.offset, + wa_ctx->indirect_ctx.size); } } @@ -4607,10 +5177,7 @@ static void execlists_init_reg_state(u32 *regs, init_common_reg_state(regs, engine, ring, inhibit); init_ppgtt_reg_state(regs, vm_alias(ce->vm)); - init_wa_bb_reg_state(regs, engine, - INTEL_GEN(engine->i915) >= 12 ? - GEN12_CTX_BB_PER_CTX_PTR : - CTX_BB_PER_CTX_PTR); + init_wa_bb_reg_state(regs, engine); __reset_stop_ring(regs, engine); } @@ -4623,29 +5190,18 @@ populate_lr_context(struct intel_context *ce, { bool inhibit = true; void *vaddr; - int ret; vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); - return ret; + drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); + return PTR_ERR(vaddr); } set_redzone(vaddr, engine); if (engine->default_state) { - void *defaults; - - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(defaults)) { - ret = PTR_ERR(defaults); - goto err_unpin_ctx; - } - - memcpy(vaddr, defaults, engine->context_size); - i915_gem_object_unpin_map(engine->default_state); + shmem_read(engine->default_state, 0, + vaddr, engine->context_size); __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } @@ -4657,14 +5213,12 @@ populate_lr_context(struct intel_context *ce, * The second page of the context object contains some registers which * must be set up prior to the first execution. */ - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + execlists_init_reg_state(vaddr + LRC_STATE_OFFSET, ce, engine, ring, inhibit); - ret = 0; -err_unpin_ctx: __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); i915_gem_object_unpin_map(ctx_obj); - return ret; + return 0; } static int __execlists_context_alloc(struct intel_context *ce, @@ -4682,6 +5236,11 @@ static int __execlists_context_alloc(struct intel_context *ce, if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ + if (INTEL_GEN(engine->i915) == 12) { + ce->wa_bb_page = context_size / PAGE_SIZE; + context_size += PAGE_SIZE; + } + ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(ctx_obj)) return PTR_ERR(ctx_obj); @@ -4721,7 +5280,8 @@ static int __execlists_context_alloc(struct intel_context *ce, ret = populate_lr_context(ce, ctx_obj, engine, ring); if (ret) { - DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); + drm_dbg(&engine->i915->drm, + "Failed to populate LRC: %d\n", ret); goto error_ring_free; } @@ -4774,6 +5334,8 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); intel_context_fini(&ve->context); + intel_engine_free_request_pool(&ve->base); + kfree(ve->bonds); kfree(ve); } @@ -4873,7 +5435,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = rq->execution_mask; if (unlikely(!mask)) { /* Invalid selection, submit to a random engine in error */ - i915_request_skip(rq, -ENODEV); + i915_request_set_error_once(rq, -ENODEV); mask = ve->siblings[0]->mask; } @@ -4887,7 +5449,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) static void virtual_submission_tasklet(unsigned long data) { struct virtual_engine * const ve = (struct virtual_engine *)data; - const int prio = ve->base.execlists.queue_priority_hint; + const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -4898,12 +5460,15 @@ static void virtual_submission_tasklet(unsigned long data) return; local_irq_disable(); - for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { - struct intel_engine_cs *sibling = ve->siblings[n]; + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); struct ve_node * const node = &ve->nodes[sibling->id]; struct rb_node **parent, *rb; bool first; + if (!READ_ONCE(ve->request)) + break; /* already handled by a sibling's tasklet */ + if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { spin_lock(&sibling->active.lock); @@ -4954,10 +5519,8 @@ static void virtual_submission_tasklet(unsigned long data) submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) { - sibling->execlists.queue_priority_hint = prio; + if (first && prio > sibling->execlists.queue_priority_hint) tasklet_hi_schedule(&sibling->execlists.tasklet); - } spin_unlock(&sibling->active.lock); } @@ -5283,11 +5846,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tE "); } - last = NULL; - count = 0; + if (execlists->switch_priority_hint != INT_MIN) + drm_printf(m, "\t\tSwitch priority hint: %d\n", + READ_ONCE(execlists->switch_priority_hint)); if (execlists->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - execlists->queue_priority_hint); + READ_ONCE(execlists->queue_priority_hint)); + + last = NULL; + count = 0; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); int i; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index dfbc214e14f5..91fd8e452d9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -90,6 +90,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine); #define LRC_PPHWSP_SZ (1) /* After the PPHWSP we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) +#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE) /* Space within PPHWSP reserved to be used as scratch */ #define LRC_PPHWSP_SCRATCH 0x34 diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index d39b72590e40..93cb6c460508 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,14 +9,13 @@ #include <linux/types.h> -/* GEN8 to GEN11 Reg State Context */ +/* GEN8 to GEN12 Reg State Context */ #define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_RING_HEAD (0x04 + 1) #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_STATE (0x10 + 1) -#define CTX_BB_PER_CTX_PTR (0x18 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_PDP3_UDW (0x24 + 1) #define CTX_PDP3_LDW (0x26 + 1) @@ -30,9 +29,6 @@ #define GEN9_CTX_RING_MI_MODE 0x54 -/* GEN12+ Reg State Context */ -#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) - #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index bef132709854..ab675d35030d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -7,6 +7,7 @@ #include <linux/pm_runtime.h> #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" @@ -112,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) struct intel_uncore *uncore = rc6_to_uncore(rc6); struct intel_engine_cs *engine; enum intel_engine_id id; - u32 rc6_mode; /* 2b: Program RC6 thresholds.*/ if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { @@ -164,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) /* 3a: Enable RC6 */ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_RC6_ENABLE | - rc6_mode; + GEN6_RC_CTL_EI_MODE(1); /* * WaRsDisableCoarsePowerGating:skl,cnl @@ -245,16 +240,18 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL); if (IS_GEN(i915, 6) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n"); } else if (IS_GEN(i915, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + drm_dbg(&i915->drm, + "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); rc6vids &= 0xffff00; rc6vids |= GEN6_ENCODE_RC6_VID(450); ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + drm_err(&i915->drm, + "Couldn't fix incorrect rc6 voltage\n"); } } @@ -262,14 +259,15 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) static int chv_rc6_init(struct intel_rc6 *rc6) { struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); resource_size_t pctx_paddr, paddr; resource_size_t pctx_size = 32 * SZ_1K; u32 pcbr; pcbr = intel_uncore_read(uncore, VLV_PCBR); if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size; + drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n"); + paddr = i915->dsm.end + 1 - pctx_size; GEM_BUG_ON(paddr > U32_MAX); pctx_paddr = (paddr & ~4095); @@ -303,7 +301,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) goto out; } - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n"); /* * From the Gunit register HAS: @@ -315,14 +313,15 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) */ pctx = i915_gem_object_create_stolen(i915, pctx_size); if (IS_ERR(pctx)) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + drm_dbg(&i915->drm, + "not enough stolen space for PCTX, disabling\n"); return PTR_ERR(pctx); } - GEM_BUG_ON(range_overflows_t(u64, - i915->dsm.start, - pctx->stolen->start, - U32_MAX)); + GEM_BUG_ON(range_overflows_end_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); pctx_paddr = i915->dsm.start + pctx->stolen->start; intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); @@ -397,14 +396,14 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); rc_sw_target &= RC_SW_TARGET_STATE_MASK; rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; - DRM_DEBUG_DRIVER("BIOS enabled RC states: " + drm_dbg(&i915->drm, "BIOS enabled RC states: " "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), rc_sw_target); if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { - DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); + drm_dbg(&i915->drm, "RC6 Base location not set properly.\n"); enable_rc6 = false; } @@ -416,7 +415,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; if (!(rc6_ctx_base >= i915->dsm_reserved.start && rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) { - DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); + drm_dbg(&i915->drm, "RC6 Base address not as expected.\n"); enable_rc6 = false; } @@ -424,24 +423,25 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 && (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 && (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) { - DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + drm_dbg(&i915->drm, + "Engine Idle wait time not set properly.\n"); enable_rc6 = false; } if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { - DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + drm_dbg(&i915->drm, "Pushbus not setup properly.\n"); enable_rc6 = false; } if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { - DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + drm_dbg(&i915->drm, "GFX pause not setup properly.\n"); enable_rc6 = false; } if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { - DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); + drm_dbg(&i915->drm, "GPM control not setup properly.\n"); enable_rc6 = false; } @@ -462,7 +462,7 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { - dev_notice(i915->drm.dev, + drm_notice(&i915->drm, "RC6 and powersaving disabled by BIOS\n"); return false; } @@ -494,7 +494,7 @@ static bool pctx_corrupted(struct intel_rc6 *rc6) if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO)) return false; - dev_notice(i915->drm.dev, + drm_notice(&i915->drm, "RC6 context corruption, disabling runtime power management\n"); return true; } @@ -602,6 +602,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6) void intel_rc6_park(struct intel_rc6 *rc6) { struct intel_uncore *uncore = rc6_to_uncore(rc6); + unsigned int target; if (!rc6->enabled) return; @@ -616,7 +617,14 @@ void intel_rc6_park(struct intel_rc6 *rc6) /* Turn off the HW timers and go directly to rc6 */ set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); - set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); + + if (HAS_RC6pp(rc6_to_i915(rc6))) + target = 0x6; /* deepest rc6 */ + else if (HAS_RC6p(rc6_to_i915(rc6))) + target = 0x5; /* deep rc6 */ + else + target = 0x4; /* normal rc6 */ + set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); } void intel_rc6_disable(struct intel_rc6 *rc6) diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 5954ecc3207f..f59e7875cc5e 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -102,7 +102,7 @@ static int render_state_setup(struct intel_renderstate *so, } if (rodata->reloc[reloc_index] != -1) { - DRM_ERROR("only %d relocs resolved\n", reloc_index); + drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index); goto err; } @@ -194,7 +194,7 @@ int intel_renderstate_init(struct intel_renderstate *so, err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_vma; + goto err_obj; err = render_state_setup(so, engine->i915); if (err) @@ -204,8 +204,6 @@ int intel_renderstate_init(struct intel_renderstate *so, err_unpin: i915_vma_unpin(so->vma); -err_vma: - i915_vma_close(so->vma); err_obj: i915_gem_object_put(obj); so->vma = NULL; @@ -221,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so, if (!so->vma) return 0; + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + if (err) + return err; + err = engine->emit_bb_start(rq, so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); @@ -235,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so, return err; } - i915_vma_lock(so->vma); - err = i915_request_await_object(rq, so->vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(so->vma, rq, 0); - i915_vma_unlock(so->vma); - - return err; + return 0; } void intel_renderstate_fini(struct intel_renderstate *so) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index aef6ab58d7d9..39070b514e65 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq) lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->context == hung_ctx) - i915_request_skip(rq, -EIO); + if (rq->context == hung_ctx) { + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); + } } static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) @@ -86,19 +88,18 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; + if (intel_context_is_closed(rq->context)) { + intel_context_set_banned(rq->context); + return true; + } + rcu_read_lock(); ctx = rcu_dereference(rq->context->gem_context); if (ctx && !kref_get_unless_zero(&ctx->ref)) ctx = NULL; rcu_read_unlock(); if (!ctx) - return false; - - if (i915_gem_context_is_closed(ctx)) { - intel_context_set_banned(rq->context); - banned = true; - goto out; - } + return intel_context_is_banned(rq->context); atomic_inc(&ctx->guilty_count); @@ -108,7 +109,7 @@ static bool mark_guilty(struct i915_request *rq) goto out; } - dev_notice(ctx->i915->drm.dev, + drm_notice(&ctx->i915->drm, "%s context reset due to GPU hang\n", ctx->name); @@ -154,11 +155,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rcu_read_lock(); /* protect the GEM context */ if (guilty) { - i915_request_skip(rq, -EIO); + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); if (mark_guilty(rq)) engine_skip_context(rq); } else { - dma_fence_set_error(&rq->fence, -EAGAIN); + i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); } rcu_read_unlock(); @@ -753,7 +755,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(gt->ggtt); + intel_ggtt_restore_fences(gt->ggtt); return err; } @@ -785,7 +787,7 @@ static void nop_submit_request(struct i915_request *request) unsigned long flags; RQ_TRACE(request, "-EIO\n"); - dma_fence_set_error(&request->fence, -EIO); + i915_request_set_error_once(request, -EIO); spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); @@ -1029,7 +1031,7 @@ void intel_gt_reset(struct intel_gt *gt, goto unlock; if (reason) - dev_notice(gt->i915->drm.dev, + drm_notice(>->i915->drm, "Resetting chip for %s\n", reason); atomic_inc(>->i915->gpu_error.reset_count); @@ -1037,7 +1039,7 @@ void intel_gt_reset(struct intel_gt *gt, if (!intel_has_gpu_reset(gt)) { if (i915_modparams.reset) - dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); + drm_err(>->i915->drm, "GPU reset not supported\n"); else drm_dbg(>->i915->drm, "GPU reset disabled\n"); goto error; @@ -1047,7 +1049,7 @@ void intel_gt_reset(struct intel_gt *gt, intel_runtime_pm_disable_interrupts(gt->i915); if (do_reset(gt, stalled_mask)) { - dev_err(gt->i915->drm.dev, "Failed to reset chip\n"); + drm_err(>->i915->drm, "Failed to reset chip\n"); goto taint; } @@ -1109,7 +1111,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) /** * intel_engine_reset - reset GPU engine to recover from a hang * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no dev_notice() + * @msg: reason for GPU reset; or NULL for no drm_notice() * * Reset a specific GPU engine. Useful if a hang is detected. * Returns zero on successful reset or otherwise an error code. @@ -1134,7 +1136,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) reset_prepare_engine(engine); if (msg) - dev_notice(engine->i915->drm.dev, + drm_notice(&engine->i915->drm, "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); @@ -1379,7 +1381,7 @@ static void intel_wedge_me(struct work_struct *work) { struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); - dev_err(w->gt->i915->drm.dev, + drm_err(&w->gt->i915->drm, "%s timed out, cancelling all in-flight rendering.\n", w->name); intel_gt_set_wedged(w->gt); diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index 5bdce24994aa..cc0ebca65167 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -88,6 +88,8 @@ static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) static inline void assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) { + unsigned int head = READ_ONCE(ring->head); + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); /* @@ -105,8 +107,7 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) * into the same cacheline as ring->head. */ #define cacheline(a) round_down(a, CACHELINE_BYTES) - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && - tail < ring->head); + GEM_BUG_ON(cacheline(tail) == cacheline(head) && tail < head); #undef cacheline } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index f70b903a98bc..ca7286e58409 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -29,11 +29,10 @@ #include <linux/log2.h> -#include <drm/i915_drm.h> - #include "gem/i915_gem_context.h" #include "gen6_ppgtt.h" +#include "gen7_renderclear.h" #include "i915_drv.h" #include "i915_trace.h" #include "intel_context.h" @@ -43,6 +42,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "intel_workarounds.h" +#include "shmem_utils.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -578,8 +578,9 @@ static void flush_cs_tlb(struct intel_engine_cs *engine) RING_INSTPM(engine->mmio_base), INSTPM_SYNC_FLUSH, 0, 1000)) - DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", - engine->name); + drm_err(&dev_priv->drm, + "%s: wait for SyncFlush to complete for TLB invalidation timed out\n", + engine->name); } static void ring_setup_status_page(struct intel_engine_cs *engine) @@ -602,8 +603,9 @@ static bool stop_ring(struct intel_engine_cs *engine) MODE_IDLE, MODE_IDLE, 1000)) { - DRM_ERROR("%s : timed out trying to stop ring\n", - engine->name); + drm_err(&dev_priv->drm, + "%s : timed out trying to stop ring\n", + engine->name); /* * Sometimes we observe that the idle flag is not @@ -662,22 +664,23 @@ static int xcs_resume(struct intel_engine_cs *engine) /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (!stop_ring(engine)) { /* G45 ring initialization often fails to reset head to zero */ - DRM_DEBUG_DRIVER("%s head not reset to zero " + drm_dbg(&dev_priv->drm, "%s head not reset to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_HEAD), + ENGINE_READ(engine, RING_TAIL), + ENGINE_READ(engine, RING_START)); + + if (!stop_ring(engine)) { + drm_err(&dev_priv->drm, + "failed to set %s head to zero " "ctl %08x head %08x tail %08x start %08x\n", engine->name, ENGINE_READ(engine, RING_CTL), ENGINE_READ(engine, RING_HEAD), ENGINE_READ(engine, RING_TAIL), ENGINE_READ(engine, RING_START)); - - if (!stop_ring(engine)) { - DRM_ERROR("failed to set %s head to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_HEAD), - ENGINE_READ(engine, RING_TAIL), - ENGINE_READ(engine, RING_START)); ret = -EIO; goto out; } @@ -720,7 +723,7 @@ static int xcs_resume(struct intel_engine_cs *engine) RING_CTL(engine->mmio_base), RING_VALID, RING_VALID, 50)) { - DRM_ERROR("%s initialization failed " + drm_err(&dev_priv->drm, "%s initialization failed " "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", engine->name, ENGINE_READ(engine, RING_CTL), @@ -897,9 +900,7 @@ static void reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->active.requests, sched.link) { - if (!i915_request_signaled(request)) - dma_fence_set_error(&request->fence, -EIO); - + i915_request_set_error_once(request, -EIO); i915_request_mark_complete(request); } @@ -1241,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine) i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); if (engine->default_state) { - void *defaults, *vaddr; + void *vaddr; vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { @@ -1249,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine) goto err_obj; } - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(defaults)) { - err = PTR_ERR(defaults); - goto err_map; - } - - memcpy(vaddr, defaults, engine->context_size); - i915_gem_object_unpin_map(engine->default_state); + shmem_read(engine->default_state, 0, + vaddr, engine->context_size); i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); @@ -1271,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine) return vma; -err_map: - i915_gem_object_unpin_map(obj); err_obj: i915_gem_object_put(obj); return ERR_PTR(err); @@ -1360,7 +1352,9 @@ static int load_pd_dir(struct i915_request *rq, return rq->engine->emit_flush(rq, EMIT_FLUSH); } -static inline int mi_set_context(struct i915_request *rq, u32 flags) +static inline int mi_set_context(struct i915_request *rq, + struct intel_context *ce, + u32 flags) { struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; @@ -1435,7 +1429,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->context->state) | flags; + *cs++ = i915_ggtt_offset(ce->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1550,13 +1544,56 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) return rq->engine->emit_flush(rq, EMIT_INVALIDATE); } +static int clear_residuals(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int ret; + + ret = switch_mm(rq, vm_alias(engine->kernel_context->vm)); + if (ret) + return ret; + + if (engine->kernel_context->state) { + ret = mi_set_context(rq, + engine->kernel_context, + MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); + if (ret) + return ret; + } + + ret = engine->emit_bb_start(rq, + engine->wa_ctx.vma->node.start, 0, + 0); + if (ret) + return ret; + + ret = engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + return ret; + + /* Always invalidate before the next switch_mm() */ + return engine->emit_flush(rq, EMIT_INVALIDATE); +} + static int switch_context(struct i915_request *rq) { + struct intel_engine_cs *engine = rq->engine; struct intel_context *ce = rq->context; + void **residuals = NULL; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + if (engine->wa_ctx.vma && ce != engine->kernel_context) { + if (engine->wa_ctx.vma->private != ce) { + ret = clear_residuals(rq); + if (ret) + return ret; + + residuals = &engine->wa_ctx.vma->private; + } + } + ret = switch_mm(rq, vm_alias(ce->vm)); if (ret) return ret; @@ -1564,7 +1601,7 @@ static int switch_context(struct i915_request *rq) if (ce->state) { u32 flags; - GEM_BUG_ON(rq->engine->id != RCS0); + GEM_BUG_ON(engine->id != RCS0); /* For resource streamer on HSW+ and power context elsewhere */ BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); @@ -1576,7 +1613,7 @@ static int switch_context(struct i915_request *rq) else flags |= MI_RESTORE_INHIBIT; - ret = mi_set_context(rq, flags); + ret = mi_set_context(rq, ce, flags); if (ret) return ret; } @@ -1585,6 +1622,20 @@ static int switch_context(struct i915_request *rq) if (ret) return ret; + /* + * Now past the point of no return, this request _will_ be emitted. + * + * Or at least this preamble will be emitted, the request may be + * interrupted prior to submitting the user payload. If so, we + * still submit the "empty" request in order to preserve global + * state tracking such as this, our tracking of the current + * dirty context. + */ + if (residuals) { + intel_context_put(*residuals); + *residuals = intel_context_get(ce); + } + return 0; } @@ -1769,6 +1820,11 @@ static void ring_release(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); + if (engine->wa_ctx.vma) { + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + } + intel_ring_unpin(engine->legacy.ring); intel_ring_put(engine->legacy.ring); @@ -1916,6 +1972,64 @@ static void setup_vecs(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } +static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, + struct i915_vma * const vma) +{ + return gen7_setup_clear_gpr_bb(engine, vma); +} + +static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int size; + int err; + + size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); + if (size <= 0) + return size; + + size = ALIGN(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(engine->i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + err = PTR_ERR(vma->private); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) + goto err_private; + + err = i915_vma_sync(vma); + if (err) + goto err_unpin; + + err = gen7_ctx_switch_bb_setup(engine, vma); + if (err) + goto err_unpin; + + engine->wa_ctx.vma = vma; + return 0; + +err_unpin: + i915_vma_unpin(vma); +err_private: + intel_context_put(vma->private); +err_obj: + i915_gem_object_put(obj); + return err; +} + int intel_ring_submission_setup(struct intel_engine_cs *engine) { struct intel_timeline *timeline; @@ -1969,11 +2083,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + err = gen7_ctx_switch_bb_init(engine); + if (err) + goto err_ring_unpin; + } + /* Finally, take ownership and responsibility for cleanup! */ engine->release = ring_release; return 0; +err_ring_unpin: + intel_ring_unpin(ring); err_ring: intel_ring_put(ring); err_timeline_unpin: @@ -1984,3 +2106,7 @@ err: intel_engine_cleanup_common(engine); return err; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_ring_submission.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 30ae29b30f11..2f59fc6df3c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -4,14 +4,19 @@ * Copyright © 2019 Intel Corporation */ +#include <drm/i915_drm.h> + #include "i915_drv.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" #include "intel_rps.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" +#define BUSY_MAX_EI 20u /* ms */ + /* * Lock protecting IPS related data structures */ @@ -42,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) intel_uncore_write_fw(uncore, reg, val); } +static void rps_timer(struct timer_list *t) +{ + struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_engine_cs *engine; + enum intel_engine_id id; + s64 max_busy[3] = {}; + ktime_t dt, last; + + for_each_engine(engine, rps_to_gt(rps), id) { + s64 busy; + int i; + + dt = intel_engine_get_busy_time(engine); + last = engine->stats.rps; + engine->stats.rps = dt; + + busy = ktime_to_ns(ktime_sub(dt, last)); + for (i = 0; i < ARRAY_SIZE(max_busy); i++) { + if (busy > max_busy[i]) + swap(busy, max_busy[i]); + } + } + + dt = ktime_get(); + last = rps->pm_timestamp; + rps->pm_timestamp = dt; + + if (intel_rps_is_active(rps)) { + s64 busy; + int i; + + dt = ktime_sub(dt, last); + + /* + * Our goal is to evaluate each engine independently, so we run + * at the lowest clocks required to sustain the heaviest + * workload. However, a task may be split into sequential + * dependent operations across a set of engines, such that + * the independent contributions do not account for high load, + * but overall the task is GPU bound. For example, consider + * video decode on vcs followed by colour post-processing + * on vecs, followed by general post-processing on rcs. + * Since multi-engines being active does imply a single + * continuous workload across all engines, we hedge our + * bets by only contributing a factor of the distributed + * load into our busyness calculation. + */ + busy = max_busy[0]; + for (i = 1; i < ARRAY_SIZE(max_busy); i++) { + if (!max_busy[i]) + break; + + busy += div_u64(max_busy[i], 1 << i); + } + GT_TRACE(rps_to_gt(rps), + "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", + busy, (int)div64_u64(100 * busy, dt), + max_busy[0], max_busy[1], max_busy[2], + rps->pm_interval); + + if (100 * busy > rps->power.up_threshold * dt && + rps->cur_freq < rps->max_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else if (100 * busy < rps->power.down_threshold * dt && + rps->cur_freq > rps->min_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else { + rps->last_adj = 0; + } + + mod_timer(&rps->timer, + jiffies + msecs_to_jiffies(rps->pm_interval)); + rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); + } +} + +static void rps_start_timer(struct intel_rps *rps) +{ + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + rps->pm_interval = 1; + mod_timer(&rps->timer, jiffies + 1); +} + +static void rps_stop_timer(struct intel_rps *rps) +{ + del_timer_sync(&rps->timer); + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + cancel_work_sync(&rps->work); +} + static u32 rps_pm_mask(struct intel_rps *rps, u8 val) { u32 mask = 0; @@ -69,21 +168,17 @@ static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - rps_reset_ei(rps); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", + rps->pm_events, rps_pm_mask(rps, rps->last_freq)); - if (IS_VALLEYVIEW(gt->i915)) - /* WaGsvRC0ResidencyMethod:vlv */ - rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; - else - rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); + rps_reset_ei(rps); spin_lock_irq(>->irq_lock); gen6_gt_pm_enable_irq(gt, rps->pm_events); spin_unlock_irq(>->irq_lock); - set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq)); + intel_uncore_write(gt->uncore, + GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); } static void gen6_rps_reset_interrupts(struct intel_rps *rps) @@ -115,9 +210,8 @@ static void rps_disable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - rps->pm_events = 0; - - set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); + intel_uncore_write(gt->uncore, + GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); spin_lock_irq(>->irq_lock); gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); @@ -134,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps) cancel_work_sync(&rps->work); rps_reset_interrupts(rps); + GT_TRACE(gt, "interrupts:off\n"); } static const struct cparams { @@ -180,14 +275,12 @@ static void gen5_rps_init(struct intel_rps *rps) fmin = (rgvmodectl & MEMMODE_FMIN_MASK); fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT; - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); + drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); rps->min_freq = fmax; + rps->efficient_freq = fstart; rps->max_freq = fmin; - - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; } static unsigned long @@ -450,7 +543,8 @@ static bool gen5_rps_enable(struct intel_rps *rps) if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); + drm_err(&uncore->i915->drm, + "stuck trying to change perf mode\n"); mdelay(1); gen5_rps_set(rps, rps->cur_freq); @@ -527,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val) static void rps_set_power(struct intel_rps *rps, int new_power) { - struct intel_uncore *uncore = rps_to_uncore(rps); - struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; @@ -537,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power) if (new_power == rps->power.mode) return; + threshold_up = 95; + threshold_down = 85; + /* Note the units here are not exactly 1us, but 1280ns. */ switch (new_power) { case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ ei_up = 16000; - threshold_up = 95; - - /* Downclock if less than 85% busy over 32ms */ ei_down = 32000; - threshold_down = 85; break; case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ ei_up = 13000; - threshold_up = 90; - - /* Downclock if less than 75% busy over 32ms */ ei_down = 32000; - threshold_down = 75; break; case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ ei_up = 10000; - threshold_up = 85; - - /* Downclock if less than 60% busy over 32ms */ ei_down = 32000; - threshold_down = 60; break; } /* When byt can survive without system hang with dynamic * sw freq adjustments, this restriction can be lifted. */ - if (IS_VALLEYVIEW(i915)) + if (IS_VALLEYVIEW(gt->i915)) goto skip_hw_write; - set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up)); + GT_TRACE(gt, + "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", + new_power, threshold_up, ei_up, threshold_down, ei_down); + + set(uncore, GEN6_RP_UP_EI, + intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); set(uncore, GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100)); + intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); - set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down)); + set(uncore, GEN6_RP_DOWN_EI, + intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); set(uncore, GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100)); + intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); set(uncore, GEN6_RP_CONTROL, - (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | @@ -640,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) { + GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive)); + mutex_lock(&rps->power.mutex); if (interactive) { - if (!rps->power.interactive++ && rps->active) + if (!rps->power.interactive++ && intel_rps_is_active(rps)) rps_set_power(rps, HIGH_POWER); } else { GEM_BUG_ON(!rps->power.interactive); @@ -667,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) GEN6_AGGRESSIVE_TURBO); set(uncore, GEN6_RPNSWREQ, swreq); + GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", + val, intel_gpu_freq(rps, val), swreq); + return 0; } @@ -679,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val) err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); vlv_punit_put(i915); + GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", + val, intel_gpu_freq(rps, val)); + return err; } @@ -709,25 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update) void intel_rps_unpark(struct intel_rps *rps) { - u8 freq; - - if (!rps->enabled) + if (!intel_rps_is_enabled(rps)) return; + GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); + /* * Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ mutex_lock(&rps->lock); - rps->active = true; - freq = max(rps->cur_freq, rps->efficient_freq), - freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); - intel_rps_set(rps, freq); - rps->last_adj = 0; + + intel_rps_set_active(rps); + intel_rps_set(rps, + clamp(rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit)); + mutex_unlock(&rps->lock); - if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps->pm_iir = 0; + if (intel_rps_has_interrupts(rps)) rps_enable_interrupts(rps); + if (intel_rps_uses_timer(rps)) + rps_start_timer(rps); if (IS_GEN(rps_to_i915(rps), 5)) gen5_rps_update(rps); @@ -735,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps) void intel_rps_park(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); + int adj; - if (!rps->enabled) + if (!intel_rps_clear_active(rps)) return; - if (INTEL_GEN(i915) >= 6) + if (intel_rps_uses_timer(rps)) + rps_stop_timer(rps); + if (intel_rps_has_interrupts(rps)) rps_disable_interrupts(rps); - rps->active = false; if (rps->last_freq <= rps->idle_freq) return; @@ -763,14 +865,34 @@ void intel_rps_park(struct intel_rps *rps) intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); rps_set(rps, rps->idle_freq, false); intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); + + /* + * Since we will try and restart from the previously requested + * frequency on unparking, treat this idle point as a downclock + * interrupt and reduce the frequency for resume. If we park/unpark + * more frequently than the rps worker can run, we will not respond + * to any EI and never see a change in frequency. + * + * (Note we accommodate Cherryview's limitation of only using an + * even bin by applying it to all.) + */ + adj = rps->last_adj; + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = -2; + rps->last_adj = adj; + rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); + + GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } void intel_rps_boost(struct i915_request *rq) { - struct intel_rps *rps = &rq->engine->gt->rps; + struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; unsigned long flags; - if (i915_request_signaled(rq) || !rps->active) + if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) return; /* Serializes with i915_request_retire() */ @@ -779,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq) !dma_fence_is_signaled_locked(&rq->fence)) { set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", + rq->fence.context, rq->fence.seqno); + if (!atomic_fetch_inc(&rps->num_waiters) && READ_ONCE(rps->cur_freq) < rps->boost_freq) schedule_work(&rps->work); @@ -796,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val) GEM_BUG_ON(val > rps->max_freq); GEM_BUG_ON(val < rps->min_freq); - if (rps->active) { + if (intel_rps_is_active(rps)) { err = rps_set(rps, val, true); if (err) return err; @@ -805,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val) * Make sure we continue to get interrupts * until we hit the minimum or maximum frequencies. */ - if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + if (intel_rps_has_interrupts(rps)) { struct intel_uncore *uncore = rps_to_uncore(rps); set(uncore, @@ -873,12 +998,14 @@ static void gen6_rps_init(struct intel_rps *rps) static bool rps_reset(struct intel_rps *rps) { + struct drm_i915_private *i915 = rps_to_i915(rps); + /* force a reset */ rps->power.mode = -1; rps->last_freq = -1; if (rps_set(rps, rps->min_freq, true)) { - DRM_ERROR("Failed to reset RPS to initial values\n"); + drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); return false; } @@ -889,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps) /* See the Gen9_GT_PM_Programming_Guide doc for the below */ static bool gen9_rps_enable(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; /* Program defaults and thresholds for RPS */ - if (IS_GEN(i915, 9)) + if (IS_GEN(gt->i915, 9)) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(rps->rp1_freq)); - /* 1 second timeout */ - intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, - GT_INTERVAL_FROM_US(i915, 1000000)); - intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; + return rps_reset(rps); } @@ -913,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(rps->rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, - 100000000 / 128); /* 1 second timeout */ - intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; + return rps_reset(rps); } @@ -930,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + return rps_reset(rps); } @@ -1015,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + /* Setting Fixed Bias */ vlv_punit_get(i915); @@ -1029,8 +1160,8 @@ static bool chv_rps_enable(struct intel_rps *rps) drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, "GPLL not enabled\n"); - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); return rps_reset(rps); } @@ -1113,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + vlv_punit_get(i915); /* Setting Fixed Bias */ @@ -1127,8 +1261,8 @@ static bool vlv_rps_enable(struct intel_rps *rps) drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, "GPLL not enabled\n"); - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); return rps_reset(rps); } @@ -1171,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips) return ips->gfx_power + state2; } +static bool has_busy_stats(struct intel_rps *rps) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, rps_to_gt(rps), id) { + if (!intel_engine_supports_stats(engine)) + return false; + } + + return true; +} + void intel_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + bool enabled = false; + + if (!HAS_RPS(i915)) + return; + + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - if (IS_CHERRYVIEW(i915)) - rps->enabled = chv_rps_enable(rps); + if (rps->max_freq <= rps->min_freq) + /* leave disabled, no room for dynamic reclocking */; + else if (IS_CHERRYVIEW(i915)) + enabled = chv_rps_enable(rps); else if (IS_VALLEYVIEW(i915)) - rps->enabled = vlv_rps_enable(rps); + enabled = vlv_rps_enable(rps); else if (INTEL_GEN(i915) >= 9) - rps->enabled = gen9_rps_enable(rps); + enabled = gen9_rps_enable(rps); else if (INTEL_GEN(i915) >= 8) - rps->enabled = gen8_rps_enable(rps); + enabled = gen8_rps_enable(rps); else if (INTEL_GEN(i915) >= 6) - rps->enabled = gen6_rps_enable(rps); + enabled = gen6_rps_enable(rps); else if (IS_IRONLAKE_M(i915)) - rps->enabled = gen5_rps_enable(rps); + enabled = gen5_rps_enable(rps); + else + MISSING_CASE(INTEL_GEN(i915)); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - if (!rps->enabled) + if (!enabled) return; - drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq); - drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq); + GT_TRACE(rps_to_gt(rps), + "min:%x, max:%x, freq:[%d, %d]\n", + rps->min_freq, rps->max_freq, + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->max_freq)); + + GEM_BUG_ON(rps->max_freq < rps->min_freq); + GEM_BUG_ON(rps->idle_freq > rps->max_freq); + + GEM_BUG_ON(rps->efficient_freq < rps->min_freq); + GEM_BUG_ON(rps->efficient_freq > rps->max_freq); - drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq); - drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq); + if (has_busy_stats(rps)) + intel_rps_set_timer(rps); + else if (INTEL_GEN(i915) >= 6) + intel_rps_set_interrupts(rps); + else + /* Ironlake currently uses intel_ips.ko */ {} + + intel_rps_set_enabled(rps); } static void gen6_rps_disable(struct intel_rps *rps) @@ -1209,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - rps->enabled = false; + intel_rps_clear_enabled(rps); + intel_rps_clear_interrupts(rps); + intel_rps_clear_timer(rps); if (INTEL_GEN(i915) >= 6) gen6_rps_disable(rps); @@ -1285,7 +1459,8 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps) CCK_GPLL_CLOCK_CONTROL, i915->czclk_freq); - DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); + drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", + rps->gpll_ref_freq); } static void vlv_rps_init(struct intel_rps *rps) @@ -1313,28 +1488,24 @@ static void vlv_rps_init(struct intel_rps *rps) i915->mem_freq = 1333; break; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); rps->max_freq = vlv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->max_freq), - rps->max_freq); + drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), rps->max_freq); rps->efficient_freq = vlv_rps_rpe_freq(rps); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->efficient_freq), - rps->efficient_freq); + drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); rps->rp1_freq = vlv_rps_guar_freq(rps); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->rp1_freq), - rps->rp1_freq); + drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); rps->min_freq = vlv_rps_min_freq(rps); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->min_freq), - rps->min_freq); + drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), rps->min_freq); vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT) | @@ -1364,28 +1535,24 @@ static void chv_rps_init(struct intel_rps *rps) i915->mem_freq = 1600; break; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); rps->max_freq = chv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->max_freq), - rps->max_freq); + drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), rps->max_freq); rps->efficient_freq = chv_rps_rpe_freq(rps); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->efficient_freq), - rps->efficient_freq); + drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); rps->rp1_freq = chv_rps_guar_freq(rps); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->rp1_freq), - rps->rp1_freq); + drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); rps->min_freq = chv_rps_min_freq(rps); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->min_freq), - rps->min_freq); + drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), rps->min_freq); vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT) | @@ -1448,20 +1615,25 @@ static void rps_work(struct work_struct *work) { struct intel_rps *rps = container_of(work, typeof(*rps), work); struct intel_gt *gt = rps_to_gt(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); bool client_boost = false; int new_freq, adj, min, max; u32 pm_iir = 0; spin_lock_irq(>->irq_lock); - pm_iir = fetch_and_zero(&rps->pm_iir); + pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; client_boost = atomic_read(&rps->num_waiters); spin_unlock_irq(>->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ - if ((pm_iir & rps->pm_events) == 0 && !client_boost) + if (!pm_iir && !client_boost) goto out; mutex_lock(&rps->lock); + if (!intel_rps_is_active(rps)) { + mutex_unlock(&rps->lock); + return; + } pm_iir |= vlv_wa_c0_ei(rps, pm_iir); @@ -1471,6 +1643,12 @@ static void rps_work(struct work_struct *work) max = rps->max_freq_softlimit; if (client_boost) max = rps->max_freq; + + GT_TRACE(gt, + "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", + pm_iir, yesno(client_boost), + adj, new_freq, min, max); + if (client_boost && new_freq < rps->boost_freq) { new_freq = rps->boost_freq; adj = 0; @@ -1502,30 +1680,18 @@ static void rps_work(struct work_struct *work) adj = 0; } - rps->last_adj = adj; - /* - * Limit deboosting and boosting to keep ourselves at the extremes - * when in the respective power modes (i.e. slowly decrease frequencies - * while in the HIGH_POWER zone and slowly increase frequencies while - * in the LOW_POWER zone). On idle, we will hit the timeout and drop - * to the next level quickly, and conversely if busy we expect to - * hit a waitboost and rapidly switch into max power. - */ - if ((adj < 0 && rps->power.mode == HIGH_POWER) || - (adj > 0 && rps->power.mode == LOW_POWER)) - rps->last_adj = 0; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt + * sysfs frequency limits may have snuck in while + * servicing the interrupt */ new_freq += adj; new_freq = clamp_t(int, new_freq, min, max); if (intel_rps_set(rps, new_freq)) { - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - rps->last_adj = 0; + drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); + adj = 0; } + rps->last_adj = adj; mutex_unlock(&rps->lock); @@ -1545,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) if (unlikely(!events)) return; + GT_TRACE(gt, "irq events:%x\n", events); + gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; @@ -1554,11 +1722,17 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) { struct intel_gt *gt = rps_to_gt(rps); + u32 events; - if (pm_iir & rps->pm_events) { + events = pm_iir & rps->pm_events; + if (events) { spin_lock(>->irq_lock); - gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); - rps->pm_iir |= pm_iir & rps->pm_events; + + GT_TRACE(gt, "irq events:%x\n", events); + + gen6_gt_pm_mask_irq(gt, events); + rps->pm_iir |= events; + schedule_work(&rps->work); spin_unlock(>->irq_lock); } @@ -1613,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps) mutex_init(&rps->power.mutex); INIT_WORK(&rps->work, rps_work); + timer_setup(&rps->timer, rps_timer, 0); atomic_set(&rps->num_waiters, 0); } @@ -1641,9 +1816,10 @@ void intel_rps_init(struct intel_rps *rps) sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, ¶ms, NULL); if (params & BIT(31)) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (rps->max_freq & 0xff) * 50, - (params & 0xff) * 50); + drm_dbg(&i915->drm, + "Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); rps->max_freq = params & 0xff; } } @@ -1651,7 +1827,9 @@ void intel_rps_init(struct intel_rps *rps) /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; + + /* Start in the middle, from here we will autotune based on workload */ + rps->cur_freq = rps->efficient_freq; rps->pm_intrmsk_mbz = 0; @@ -1668,6 +1846,12 @@ void intel_rps_init(struct intel_rps *rps) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } +void intel_rps_sanitize(struct intel_rps *rps) +{ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_disable_interrupts(rps); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -1695,7 +1879,7 @@ static u32 read_cagf(struct intel_rps *rps) freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); } else { - freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1); + freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1); } return intel_rps_get_cagf(rps, freq); @@ -1703,7 +1887,7 @@ static u32 read_cagf(struct intel_rps *rps) u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { - struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm; + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; intel_wakeref_t wakeref; u32 freq = 0; @@ -1903,3 +2087,7 @@ bool i915_gpu_turbo_disable(void) return ret; } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_rps.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index dfa98194f3b2..8d3c9d663662 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -13,6 +13,7 @@ struct i915_request; void intel_rps_init_early(struct intel_rps *rps); void intel_rps_init(struct intel_rps *rps); +void intel_rps_sanitize(struct intel_rps *rps); void intel_rps_driver_register(struct intel_rps *rps); void intel_rps_driver_unregister(struct intel_rps *rps); @@ -36,4 +37,64 @@ void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +static inline bool intel_rps_is_enabled(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_ENABLED, &rps->flags); +} + +static inline void intel_rps_set_enabled(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_ENABLED, &rps->flags); +} + +static inline void intel_rps_clear_enabled(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_ENABLED, &rps->flags); +} + +static inline bool intel_rps_is_active(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_ACTIVE, &rps->flags); +} + +static inline void intel_rps_set_active(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_ACTIVE, &rps->flags); +} + +static inline bool intel_rps_clear_active(struct intel_rps *rps) +{ + return test_and_clear_bit(INTEL_RPS_ACTIVE, &rps->flags); +} + +static inline bool intel_rps_has_interrupts(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_INTERRUPTS, &rps->flags); +} + +static inline void intel_rps_set_interrupts(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_INTERRUPTS, &rps->flags); +} + +static inline void intel_rps_clear_interrupts(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_INTERRUPTS, &rps->flags); +} + +static inline bool intel_rps_uses_timer(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_TIMER, &rps->flags); +} + +static inline void intel_rps_set_timer(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_TIMER, &rps->flags); +} + +static inline void intel_rps_clear_timer(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_TIMER, &rps->flags); +} + #endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index c2e279154bd5..38083f0402d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -31,6 +31,13 @@ struct intel_rps_ei { u32 media_c0; }; +enum { + INTEL_RPS_ENABLED = 0, + INTEL_RPS_ACTIVE, + INTEL_RPS_INTERRUPTS, + INTEL_RPS_TIMER, +}; + struct intel_rps { struct mutex lock; /* protects enabling and the worker */ @@ -38,9 +45,12 @@ struct intel_rps { * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock */ + struct timer_list timer; struct work_struct work; - bool enabled; - bool active; + unsigned long flags; + + ktime_t pm_timestamp; + u32 pm_interval; u32 pm_iir; /* PM interrupt bits that should never be masked */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 74f793423231..d173271c7397 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -65,7 +65,6 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, { const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; u8 slices, subslices; u32 rpcs = 0; @@ -78,31 +77,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, /* * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. + * on the system. Use the configuration pinned by i915/perf. */ - if (!i915->perf.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_sseu_from_device_info(sseu); - - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } + if (i915->perf.exclusive_stream) + req_sseu = &i915->perf.sseu; - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); + slices = hweight8(req_sseu->slice_mask); + subslices = hweight8(req_sseu->subslice_mask); /* * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits @@ -175,13 +156,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, if (sseu->has_eu_pg) { u32 val; - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); val &= GEN8_RPCS_EU_MIN_MASK; rpcs |= val; - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); val &= GEN8_RPCS_EU_MAX_MASK; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 54e1e55f3c81..4546284fede1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -119,6 +119,15 @@ static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) spin_unlock_irqrestore(>->hwsp_lock, flags); } +static void __rcu_cacheline_free(struct rcu_head *rcu) +{ + struct intel_timeline_cacheline *cl = + container_of(rcu, typeof(*cl), rcu); + + i915_active_fini(&cl->active); + kfree(cl); +} + static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) { GEM_BUG_ON(!i915_active_is_idle(&cl->active)); @@ -127,8 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) i915_vma_put(cl->hwsp->vma); __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - i915_active_fini(&cl->active); - kfree_rcu(cl, rcu); + call_rcu(&cl->rcu, __rcu_cacheline_free); } __i915_active_call @@ -192,16 +200,20 @@ static void cacheline_release(struct intel_timeline_cacheline *cl) static void cacheline_free(struct intel_timeline_cacheline *cl) { + if (!i915_active_acquire_if_busy(&cl->active)) { + __idle_cacheline_free(cl); + return; + } + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); - if (i915_active_is_idle(&cl->active)) - __idle_cacheline_free(cl); + i915_active_release(&cl->active); } -int intel_timeline_init(struct intel_timeline *timeline, - struct intel_gt *gt, - struct i915_vma *hwsp) +static int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) { void *vaddr; @@ -268,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt) INIT_LIST_HEAD(&timelines->hwsp_free_list); } -void intel_timeline_fini(struct intel_timeline *timeline) +static void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); @@ -325,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl) return 0; } +void intel_timeline_reset_seqno(const struct intel_timeline *tl) +{ + /* Must be pinned to be writable, and no requests in flight. */ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); +} + void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; @@ -353,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl) return; spin_lock(&timelines->lock); - if (!atomic_fetch_inc(&tl->active_count)) + if (!atomic_fetch_inc(&tl->active_count)) { + /* + * The HWSP is volatile, and may have been lost while inactive, + * e.g. across suspend/resume. Be paranoid, and ensure that + * the HWSP value matches our seqno so we don't proclaim + * the next request as already complete. + */ + intel_timeline_reset_seqno(tl); list_add_tail(&tl->link, &timelines->active_list); + } spin_unlock(&timelines->lock); } @@ -517,6 +544,8 @@ int intel_timeline_read_hwsp(struct i915_request *from, rcu_read_lock(); cl = rcu_dereference(from->hwsp_cacheline); + if (i915_request_completed(from)) /* confirm cacheline is valid */ + goto unlock; if (unlikely(!i915_active_acquire_if_busy(&cl->active))) goto unlock; /* seqno wrapped and completed! */ if (unlikely(i915_request_completed(from))) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index f5b7eade3809..4298b9ac7327 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -31,11 +31,6 @@ #include "i915_syncmap.h" #include "gt/intel_timeline_types.h" -int intel_timeline_init(struct intel_timeline *tl, - struct intel_gt *gt, - struct i915_vma *hwsp); -void intel_timeline_fini(struct intel_timeline *tl); - struct intel_timeline * intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); @@ -84,6 +79,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, void intel_timeline_exit(struct intel_timeline *tl); void intel_timeline_unpin(struct intel_timeline *tl); +void intel_timeline_reset_seqno(const struct intel_timeline *tl); + int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *until, u32 *hwsp_offset); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 887e0dc701f7..90a2b9e399b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; - /* WaForceContextSaveRestoreNonCoherent:cnl */ WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - /* WaPushConstantDereferenceHoldDisable:cnl */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); @@ -575,29 +564,46 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* allow headerless messages for preemptible GPGPU context */ WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, GEN11_SAMPLER_ENABLE_HEADLESS_MSG); + + /* Wa_1604278689:icl,ehl */ + wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); + wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER, + 0, /* write-only register; skip validation */ + 0xFFFFFFFF); + + /* Wa_1406306137:icl,ehl */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - u32 val; - - /* Wa_1409142259:tgl */ + /* + * Wa_1409142259:tgl + * Wa_1409347922:tgl + * Wa_1409252684:tgl + * Wa_1409217633:tgl + * Wa_1409207793:tgl + * Wa_1409178076:tgl + * Wa_1408979724:tgl + */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - /* Wa_1604555607:tgl */ - val = intel_uncore_read(engine->uncore, FF_MODE2); - val &= ~FF_MODE2_TDS_TIMER_MASK; - val |= FF_MODE2_TDS_TIMER_128; /* - * FIXME: FF_MODE2 register is not readable till TGL B0. We can - * enable verification of WA from the later steppings, which enables - * the read of FF_MODE2. + * Wa_1604555607:gen12 and Wa_1608008084:gen12 + * FF_MODE2 register will return the wrong value when read. The default + * value for this register is zero for all fields and there are no bit + * masks. So instead of doing a RMW we should just write the TDS timer + * value for Wa_1604555607. */ - wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val, - IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 : - FF_MODE2_TDS_TIMER_MASK); + wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, 0); + + /* WaDisableGPGPUMidThreadPreemption:tgl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); } static void @@ -820,7 +826,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & GEN10_L3BANK_MASK; - DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse); + drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); } else { l3_en = ~0; @@ -829,7 +835,8 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) slice = fls(sseu->slice_mask) - 1; subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); if (!subslice) { - DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", + drm_warn(&i915->drm, + "No common index found between subslice mask %x and L3 bank mask %x!\n", intel_sseu_get_subslices(sseu, slice), l3_en); subslice = fls(l3_en); drm_WARN_ON(&i915->drm, !subslice); @@ -844,7 +851,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; } - DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr); + drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); } @@ -854,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { wa_init_mcr(i915, wal); - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - wa_write_or(wal, - GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); - /* WaInPlaceDecompressionHang:cnl */ wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, @@ -903,11 +904,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) SLICE_UNIT_LEVEL_CLKGATE, MSCUNIT_CLKGATE_DIS); - /* Wa_1406680159:icl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); - /* Wa_1406838659:icl (pre-prod) */ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, @@ -921,22 +917,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); - /* Wa_1607087056:icl */ - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + /* Wa_1607087056:icl,ehl,jsl */ + if (IS_ICELAKE(i915) || + IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) { + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + } } static void tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { + wa_init_mcr(i915, wal); + /* Wa_1409420604:tgl */ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); - /* Wa_1409180338:tgl */ + /* Wa_1607087056:tgl also know as BUG:1409180338 */ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, @@ -1251,6 +1252,7 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) case RENDER_CLASS: /* * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl + * Wa_1408556865:tgl * * This covers 4 registers which are next to one another : * - PS_INVOCATION_COUNT @@ -1264,6 +1266,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) /* Wa_1808121037:tgl */ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); + + /* Wa_1806527549:tgl */ + whitelist_reg(w, HIZ_CHICKEN); break; default: break; @@ -1330,19 +1335,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { - /* Wa_1606700617:tgl */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); - - /* Wa_1607138336:tgl */ + /* + * Wa_1607138336:tgl + * Wa_1607063988:tgl + */ wa_write_or(wal, GEN9_CTX_PREEMPT_REG, GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); - /* Wa_1607030317:tgl */ - /* Wa_1607186500:tgl */ - /* Wa_1607297627:tgl */ + /* + * Wa_1607030317:tgl + * Wa_1607186500:tgl + * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2 + * of then says it is fixed on B0 the other one says it is + * permanent + */ wa_masked_en(wal, GEN6_RC_SLEEP_PSMI_CONTROL, GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | @@ -1361,10 +1368,29 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + /* Wa_1408615072:tgl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + VSUNIT_CLKGATE_DIS_TGL); + } + + if (IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); + + /* Wa_1409804808:tgl */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + GEN12_PUSH_CONST_DEREF_HOLD_DIS); + + /* Wa_1606700617:tgl */ wa_masked_en(wal, - GEN7_ROW_CHICKEN2, - GEN12_DISABLE_EARLY_READ); + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + + /* + * Wa_1409085225:tgl + * Wa_14010229206:tgl + */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } if (IS_GEN(i915, 11)) { @@ -1430,10 +1456,38 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN11_SCRATCH2, GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 0); + + /* WaEnable32PlaneMode:icl */ + wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, + GEN11_ENABLE_32_PLANE_MODE); + + /* + * Wa_1408615072:icl,ehl (vsunit) + * Wa_1407596294:icl,ehl (hsunit) + */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); + + /* Wa_1407352427:icl,ehl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + PSDUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl,ehl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + + /* + * Wa_1408767742:icl[a2..forever],ehl[all] + * Wa_1605460711:icl[a0..c0] + */ + wa_write_or(wal, + GEN7_FF_THREAD_MODE, + GEN12_FF_TESSELATION_DOP_GATE_DISABLE); } - if (IS_GEN_RANGE(i915, 9, 11)) { - /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ + if (IS_GEN_RANGE(i915, 9, 12)) { + /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL); @@ -1600,15 +1654,34 @@ err_obj: return ERR_PTR(err); } +static const struct { + u32 start; + u32 end; +} mcr_ranges_gen8[] = { + { .start = 0x5500, .end = 0x55ff }, + { .start = 0x7000, .end = 0x7fff }, + { .start = 0x9400, .end = 0x97ff }, + { .start = 0xb000, .end = 0xb3ff }, + { .start = 0xe000, .end = 0xe7ff }, + {}, +}; + static bool mcr_range(struct drm_i915_private *i915, u32 offset) { + int i; + + if (INTEL_GEN(i915) < 8) + return false; + /* - * Registers in this range are affected by the MCR selector + * Registers in these ranges are affected by the MCR selector * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) - return true; + for (i = 0; mcr_ranges_gen8[i].start; i++) + if (offset >= mcr_ranges_gen8[i].start && + offset <= mcr_ranges_gen8[i].end) + return true; return false; } diff --git a/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c new file mode 100644 index 000000000000..610ca7687735 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + * + * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC + */ + +static const u32 ivb_clear_kernel[] = { + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000040, 0x20280c21, 0x00000028, 0x00000001, + 0x01000010, 0x20000c20, 0x0000002c, 0x00000000, + 0x00010220, 0x34001c00, 0x00001400, 0x0000002c, + 0x00600001, 0x20600061, 0x00000000, 0x00000000, + 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c, + 0x00000005, 0x20601ca5, 0x00000060, 0x00000001, + 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d, + 0x00000005, 0x20641ca5, 0x00000064, 0x00000003, + 0x00000041, 0x207424a5, 0x00000064, 0x00000034, + 0x00000040, 0x206014a5, 0x00000060, 0x00000074, + 0x00000008, 0x20681c85, 0x00000e00, 0x00000008, + 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f, + 0x00000041, 0x20701ca5, 0x00000060, 0x00000010, + 0x00000040, 0x206814a5, 0x00000068, 0x00000070, + 0x00600001, 0x20a00061, 0x00000000, 0x00000000, + 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007, + 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004, + 0x00600001, 0x20800021, 0x008d0000, 0x00000000, + 0x00000001, 0x20800021, 0x0000006c, 0x00000000, + 0x00000001, 0x20840021, 0x00000068, 0x00000000, + 0x00000001, 0x20880061, 0x00000000, 0x00000003, + 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001, + 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001, + 0x02000040, 0x20281c21, 0x00000028, 0xffffffff, + 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc, + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000001, 0x220010e4, 0x00000000, 0x00000000, + 0x00000001, 0x220831ec, 0x00000000, 0x007f007f, + 0x00600001, 0x20400021, 0x008d0000, 0x00000000, + 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000, + 0x00200001, 0x20400121, 0x00450020, 0x00000000, + 0x00000001, 0x20480061, 0x00000000, 0x000f000f, + 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef, + 0x00800001, 0x20600061, 0x00000000, 0x00000000, + 0x00800001, 0x20800061, 0x00000000, 0x00000000, + 0x00800001, 0x20a00061, 0x00000000, 0x00000000, + 0x00800001, 0x20c00061, 0x00000000, 0x00000000, + 0x00800001, 0x20e00061, 0x00000000, 0x00000000, + 0x00800001, 0x21000061, 0x00000000, 0x00000000, + 0x00800001, 0x21200061, 0x00000000, 0x00000000, + 0x00800001, 0x21400061, 0x00000000, 0x00000000, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x00000040, 0x20402d21, 0x00000020, 0x00100010, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff, + 0x00800001, 0xa0000109, 0x00000602, 0x00000000, + 0x00000040, 0x22001c84, 0x00000200, 0x00000020, + 0x00010220, 0x34001c00, 0x00001400, 0xfffffff8, + 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010, +}; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 5633515c12e9..b8dd3cbc8696 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -28,7 +28,6 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "mock_engine.h" #include "selftests/mock_request.h" @@ -244,9 +243,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->active.requests, sched.link) { - if (!i915_request_signaled(request)) - dma_fence_set_error(&request->fence, -EIO); - + i915_request_set_error_once(request, -EIO); i915_request_mark_complete(request); } @@ -330,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); - intel_engine_pool_init(&engine->pool); ce = create_kernel_context(engine); if (IS_ERR(ce)) diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index e874dfaa5316..52af1cee9a94 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq) /* Opencode i915_request_add() so we can keep the timeline locked. */ __i915_request_commit(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_queue(rq, NULL); timeout = i915_request_wait(rq, 0, HZ / 10); @@ -154,10 +155,7 @@ static int live_context_size(void *arg) */ for_each_engine(engine, gt, id) { - struct { - struct drm_i915_gem_object *state; - void *pinned; - } saved; + struct file *saved; if (!engine->context_size) continue; @@ -171,8 +169,7 @@ static int live_context_size(void *arg) * active state is sufficient, we are only checking that we * don't use more than we planned. */ - saved.state = fetch_and_zero(&engine->default_state); - saved.pinned = fetch_and_zero(&engine->pinned_default_state); + saved = fetch_and_zero(&engine->default_state); /* Overlaps with the execlists redzone */ engine->context_size += I915_GTT_PAGE_SIZE; @@ -181,8 +178,7 @@ static int live_context_size(void *arg) engine->context_size -= I915_GTT_PAGE_SIZE; - engine->pinned_default_state = saved.pinned; - engine->default_state = saved.state; + engine->default_state = saved; intel_engine_pm_put(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 43d4d589749f..697114dd1f47 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -142,6 +142,24 @@ out: return err; } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int live_idle_flush(void *arg) { struct intel_gt *gt = arg; @@ -152,9 +170,11 @@ static int live_idle_flush(void *arg) /* Check that we can flush the idle barriers */ for_each_engine(engine, gt, id) { - intel_engine_pm_get(engine); + unsigned long heartbeat; + + engine_heartbeat_disable(engine, &heartbeat); err = __live_idle_pulse(engine, intel_engine_flush_barriers); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); if (err) break; } @@ -172,9 +192,11 @@ static int live_idle_pulse(void *arg) /* Check that heartbeat pulses flush the idle barriers */ for_each_engine(engine, gt, id) { - intel_engine_pm_get(engine); + unsigned long heartbeat; + + engine_heartbeat_disable(engine, &heartbeat); err = __live_idle_pulse(engine, intel_engine_pulse); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); if (err && err != -ENODEV) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 09ff8e4f88af..242181a5214c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -7,6 +7,7 @@ #include "selftest_llc.h" #include "selftest_rc6.h" +#include "selftest_rps.h" static int live_gt_resume(void *arg) { @@ -52,6 +53,13 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_rc6_manual), + SUBTEST(live_rps_clock_interval), + SUBTEST(live_rps_control), + SUBTEST(live_rps_frequency_cs), + SUBTEST(live_rps_frequency_srm), + SUBTEST(live_rps_power), + SUBTEST(live_rps_interrupt), + SUBTEST(live_rps_dynamic), SUBTEST(live_gt_resume), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 3e5e6c86e843..2b2efff6e19d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) cancel_rq: if (err) { - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); i915_request_add(rq); } unpin_hws: @@ -1640,7 +1640,7 @@ static int igt_reset_engines_atomic(void *arg) if (!intel_has_reset_engine(gt)) return 0; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; igt_global_reset_lock(gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 49b93cda04ca..824f99c4cc7c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -21,7 +21,8 @@ #include "gem/selftests/mock_context.h" #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) -#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ +#define NUM_GPR 16 +#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ static struct i915_vma *create_scratch(struct intel_gt *gt) { @@ -68,26 +69,84 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine, engine->props.heartbeat_interval_ms = saved; } +static bool is_active(struct i915_request *rq) +{ + if (i915_request_is_active(rq)) + return true; + + if (i915_request_on_hold(rq)) + return true; + + if (i915_request_started(rq)) + return true; + + return false; +} + static int wait_for_submit(struct intel_engine_cs *engine, struct i915_request *rq, unsigned long timeout) { timeout += jiffies; do { + bool done = time_after(jiffies, timeout); + + if (i915_request_completed(rq)) /* that was quick! */ + return 0; + + /* Wait until the HW has acknowleged the submission (or err) */ + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) + return 0; + + if (done) + return -ETIME; + + cond_resched(); + } while (1); +} + +static int wait_for_reset(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + timeout += jiffies; + + do { cond_resched(); intel_engine_flush_submission(engine); if (READ_ONCE(engine->execlists.pending[0])) continue; - if (i915_request_is_active(rq)) - return 0; + if (i915_request_completed(rq)) + break; - if (i915_request_started(rq)) /* that was quick! */ - return 0; + if (READ_ONCE(rq->fence.error)) + break; } while (time_before(jiffies, timeout)); - return -ETIME; + flush_scheduled_work(); + + if (rq->fence.error != -EIO) { + pr_err("%s: hanging request %llx:%lld not reset\n", + engine->name, + rq->fence.context, + rq->fence.seqno); + return -EINVAL; + } + + /* Give the request a jiffie to complete after flushing the worker */ + if (i915_request_wait(rq, 0, + max(0l, (long)(timeout - jiffies)) + 1) < 0) { + pr_err("%s: hanging request %llx:%lld did not complete\n", + engine->name, + rq->fence.context, + rq->fence.seqno); + return -ETIME; + } + + return 0; } static int live_sanitycheck(void *arg) @@ -591,9 +650,9 @@ static int live_error_interrupt(void *arg) error_repr(p->error[i])); if (!i915_request_started(client[i])) { - pr_debug("%s: %s request not stated!\n", - engine->name, - error_repr(p->error[i])); + pr_err("%s: %s request not started!\n", + engine->name, + error_repr(p->error[i])); err = -ETIME; goto out; } @@ -601,9 +660,10 @@ static int live_error_interrupt(void *arg) /* Kick the tasklet to process the error */ intel_engine_flush_submission(engine); if (client[i]->fence.error != p->error[i]) { - pr_err("%s: %s request completed with wrong error code: %d\n", + pr_err("%s: %s request (%s) with wrong error code: %d\n", engine->name, error_repr(p->error[i]), + i915_request_completed(client[i]) ? "completed" : "running", client[i]->fence.error); err = -EINVAL; goto out; @@ -886,7 +946,7 @@ create_rewinder(struct intel_context *ce, goto err; } - cs = intel_ring_begin(rq, 10); + cs = intel_ring_begin(rq, 14); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err; @@ -898,8 +958,8 @@ create_rewinder(struct intel_context *ce, *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = idx; *cs++ = offset; *cs++ = 0; @@ -908,6 +968,11 @@ create_rewinder(struct intel_context *ce, *cs++ = offset + idx * sizeof(u32); *cs++ = 0; + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = idx + 1; + intel_ring_advance(rq, cs); rq->sched.attr.priority = I915_PRIORITY_MASK; @@ -941,7 +1006,7 @@ static int live_timeslice_rewind(void *arg) for_each_engine(engine, gt, id) { enum { A1, A2, B1 }; - enum { X = 1, Y, Z }; + enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; unsigned long heartbeat; @@ -974,13 +1039,13 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[0] = create_rewinder(ce, NULL, slot, 1); + rq[0] = create_rewinder(ce, NULL, slot, X); if (IS_ERR(rq[0])) { intel_context_put(ce); goto err; } - rq[1] = create_rewinder(ce, NULL, slot, 2); + rq[1] = create_rewinder(ce, NULL, slot, Y); intel_context_put(ce); if (IS_ERR(rq[1])) goto err; @@ -998,7 +1063,7 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[2] = create_rewinder(ce, rq[0], slot, 3); + rq[2] = create_rewinder(ce, rq[0], slot, Z); intel_context_put(ce); if (IS_ERR(rq[2])) goto err; @@ -1009,18 +1074,14 @@ static int live_timeslice_rewind(void *arg) engine->name); goto err; } - GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ - GEM_BUG_ON(!i915_request_is_active(rq[A1])); - GEM_BUG_ON(!i915_request_is_active(rq[A2])); - GEM_BUG_ON(!i915_request_is_active(rq[B1])); - - /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); - intel_engine_flush_submission(engine); - + if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ + /* Wait for the timeslice to kick in */ + del_timer(&engine->execlists.timer); + tasklet_hi_schedule(&engine->execlists.tasklet); + intel_engine_flush_submission(engine); + } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ GEM_BUG_ON(!i915_request_is_active(rq[A1])); GEM_BUG_ON(!i915_request_is_active(rq[B1])); @@ -1185,8 +1246,14 @@ static int live_timeslice_queue(void *arg) if (err) goto err_rq; - intel_engine_flush_submission(engine); + /* Wait until we ack the release_queue and start timeslicing */ + do { + cond_resched(); + intel_engine_flush_submission(engine); + } while (READ_ONCE(engine->execlists.pending[0])); + if (!READ_ONCE(engine->execlists.timer.expires) && + execlists_active(&engine->execlists) == rq && !i915_request_completed(rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1805,14 +1872,9 @@ static int __cancel_active0(struct live_preempt_cancel *arg) if (err) goto out; - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; + err = wait_for_reset(arg->engine, rq, HZ / 2); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); goto out; } @@ -1870,10 +1932,9 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; igt_spinner_end(&arg->a.spin); - if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { - err = -EIO; + err = wait_for_reset(arg->engine, rq[1], HZ / 2); + if (err) goto out; - } if (rq[0]->fence.error != 0) { pr_err("Normal inflight0 request did not complete\n"); @@ -1953,10 +2014,9 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { - err = -EIO; + err = wait_for_reset(arg->engine, rq[2], HZ / 2); + if (err) goto out; - } if (rq[0]->fence.error != -EIO) { pr_err("Cancelled inflight0 request did not report -EIO\n"); @@ -1994,6 +2054,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) return 0; + if (!intel_has_reset_engine(arg->engine->gt)) + return 0; + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); rq = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, @@ -2014,14 +2077,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) if (err) goto out; - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; + err = wait_for_reset(arg->engine, rq, HZ / 2); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); goto out; } @@ -2109,7 +2167,7 @@ static int live_suppress_self_preempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; /* presume black blox */ if (intel_vgpu_active(gt->i915)) @@ -2599,7 +2657,7 @@ static int create_gang(struct intel_engine_cs *engine, if (IS_ERR(rq)) goto err_obj; - rq->batch = vma; + rq->batch = i915_vma_get(vma); i915_request_get(rq); i915_vma_lock(vma); @@ -2623,6 +2681,7 @@ static int create_gang(struct intel_engine_cs *engine, return 0; err_rq: + i915_vma_put(rq->batch); i915_request_put(rq); err_obj: i915_gem_object_put(obj); @@ -2719,6 +2778,7 @@ static int live_preempt_gang(void *arg) err = -ETIME; } + i915_vma_put(rq->batch); i915_request_put(rq); rq = n; } @@ -2732,6 +2792,331 @@ static int live_preempt_gang(void *arg) return 0; } +static struct i915_vma * +create_gpr_user(struct intel_engine_cs *engine, + struct i915_vma *result, + unsigned int offset) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + int i; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, result->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(vma); + return ERR_CAST(cs); + } + + /* All GPR are clear for new contexts. We use GPR(0) as a constant */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, 0); + *cs++ = 1; + + for (i = 1; i < NUM_GPR; i++) { + u64 addr; + + /* + * Perform: GPR[i]++ + * + * As we read and write into the context saved GPR[i], if + * we restart this batch buffer from an earlier point, we + * will repeat the increment and store a value > 1. + */ + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); + + addr = result->node.start + offset + i * sizeof(*cs); + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = CS_GPR(engine, 2 * i); + *cs++ = lower_32_bits(addr); + *cs++ = upper_32_bits(addr); + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = i; + *cs++ = lower_32_bits(result->node.start); + *cs++ = upper_32_bits(result->node.start); + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + return vma; +} + +static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, sz); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_ggtt_pin(vma, 0, 0); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + return vma; +} + +static struct i915_request * +create_gpr_client(struct intel_engine_cs *engine, + struct i915_vma *global, + unsigned int offset) +{ + struct i915_vma *batch, *vma; + struct intel_context *ce; + struct i915_request *rq; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + vma = i915_vma_instance(global->obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_ce; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_ce; + + batch = create_gpr_user(engine, vma, offset); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_vma; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + + i915_vma_lock(batch); + if (!err) + err = i915_request_await_object(rq, batch->obj, false); + if (!err) + err = i915_vma_move_to_active(batch, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + batch->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(batch); + i915_vma_unpin(batch); + + if (!err) + i915_request_get(rq); + i915_request_add(rq); + +out_batch: + i915_vma_put(batch); +out_vma: + i915_vma_unpin(vma); +out_ce: + intel_context_put(ce); + return err ? ERR_PTR(err) : rq; +} + +static int preempt_user(struct intel_engine_cs *engine, + struct i915_vma *global, + int id) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_MAX + }; + struct i915_request *rq; + int err = 0; + u32 *cs; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(global); + *cs++ = 0; + *cs++ = id; + + intel_ring_advance(rq, cs); + + i915_request_get(rq); + i915_request_add(rq); + + engine->schedule(rq, &attr); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int live_preempt_user(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *global; + enum intel_engine_id id; + u32 *result; + int err = 0; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + /* + * In our other tests, we look at preemption in carefully + * controlled conditions in the ringbuffer. Since most of the + * time is spent in user batches, most of our preemptions naturally + * occur there. We want to verify that when we preempt inside a batch + * we continue on from the current instruction and do not roll back + * to the start, or another earlier arbitration point. + * + * To verify this, we create a batch which is a mixture of + * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with + * a few preempting contexts thrown into the mix, we look for any + * repeated instructions (which show up as incorrect values). + */ + + global = create_global(gt, 4096); + if (IS_ERR(global)) + return PTR_ERR(global); + + result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); + if (IS_ERR(result)) { + i915_vma_unpin_and_release(&global, 0); + return PTR_ERR(result); + } + + for_each_engine(engine, gt, id) { + struct i915_request *client[3] = {}; + struct igt_live_test t; + int i; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) + continue; /* we need per-context GPR */ + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + memset(result, 0, 4096); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_request *rq; + + rq = create_gpr_client(engine, global, + NUM_GPR * i * sizeof(u32)); + if (IS_ERR(rq)) + goto end_test; + + client[i] = rq; + } + + /* Continuously preempt the set of 3 running contexts */ + for (i = 1; i <= NUM_GPR; i++) { + err = preempt_user(engine, global, i); + if (err) + goto end_test; + } + + if (READ_ONCE(result[0]) != NUM_GPR) { + pr_err("%s: Failed to release semaphore\n", + engine->name); + err = -EIO; + goto end_test; + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + int gpr; + + if (i915_request_wait(client[i], 0, HZ / 2) < 0) { + err = -ETIME; + goto end_test; + } + + for (gpr = 1; gpr < NUM_GPR; gpr++) { + if (result[NUM_GPR * i + gpr] != 1) { + pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", + engine->name, + i, gpr, result[NUM_GPR * i + gpr]); + err = -EINVAL; + goto end_test; + } + } + } + +end_test: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i]) + break; + + i915_request_put(client[i]); + } + + /* Flush the semaphores on error */ + smp_store_mb(result[0], -1); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); + return err; +} + static int live_preempt_timeout(void *arg) { struct intel_gt *gt = arg; @@ -3224,7 +3609,7 @@ static int live_virtual_engine(void *arg) unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; for_each_engine(engine, gt, id) { @@ -3357,7 +3742,7 @@ static int live_virtual_mask(void *arg) unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -3499,7 +3884,7 @@ static int live_virtual_preserved(void *arg) * are preserved. */ - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; /* As we use CS_GPR we cannot run before they existed on all engines. */ @@ -3729,7 +4114,7 @@ static int live_virtual_bond(void *arg) unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -3890,7 +4275,7 @@ static int live_virtual_reset(void *arg) * forgotten. */ - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; if (!intel_has_reset_engine(gt)) @@ -3939,6 +4324,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_chain_preempt), SUBTEST(live_preempt_gang), SUBTEST(live_preempt_timeout), + SUBTEST(live_preempt_user), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), @@ -3956,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) return intel_gt_live_subtests(tests, &i915->gt); } -static void hexdump(const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - pr_info("*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - pr_info("[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static int emit_semaphore_signal(struct intel_context *ce, void *slot) { const u32 offset = @@ -4015,6 +4372,32 @@ static int emit_semaphore_signal(struct intel_context *ce, void *slot) return 0; } +static int context_flush(struct intel_context *ce, long timeout) +{ + struct i915_request *rq; + struct dma_fence *fence; + int err = 0; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + rq = i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) + err = -ETIME; + i915_request_put(rq); + + rmb(); /* We know the request is written, make sure all state is too! */ + return err; +} + static int live_lrc_layout(void *arg) { struct intel_gt *gt = arg; @@ -4040,13 +4423,12 @@ static int live_lrc_layout(void *arg) if (!engine->default_state) continue; - hw = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); + hw = shmem_pin_map(engine->default_state); if (IS_ERR(hw)) { err = PTR_ERR(hw); break; } - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), engine->kernel_context, @@ -4107,13 +4489,13 @@ static int live_lrc_layout(void *arg) if (err) { pr_info("%s: HW register image:\n", engine->name); - hexdump(hw, PAGE_SIZE); + igt_hexdump(hw, PAGE_SIZE); pr_info("%s: SW register image:\n", engine->name); - hexdump(lrc, PAGE_SIZE); + igt_hexdump(lrc, PAGE_SIZE); } - i915_gem_object_unpin_map(engine->default_state); + shmem_unpin_map(engine->default_state, hw); if (err) break; } @@ -4182,10 +4564,35 @@ static int live_lrc_fixed(void *arg) "BB_STATE" }, { + i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), + lrc_ring_wa_bb_per_ctx(engine), + "RING_BB_PER_CTX_PTR" + }, + { + i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), + lrc_ring_indirect_ptr(engine), + "RING_INDIRECT_CTX_PTR" + }, + { + i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), + lrc_ring_indirect_offset(engine), + "RING_INDIRECT_CTX_OFFSET" + }, + { i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), CTX_TIMESTAMP - 1, "RING_CTX_TIMESTAMP" }, + { + i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), + lrc_ring_gpr0(engine), + "RING_CS_GPR0" + }, + { + i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), + lrc_ring_cmd_buf_cctl(engine), + "RING_CMD_BUF_CCTL" + }, { }, }, *t; u32 *hw; @@ -4193,13 +4600,12 @@ static int live_lrc_fixed(void *arg) if (!engine->default_state) continue; - hw = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); + hw = shmem_pin_map(engine->default_state); if (IS_ERR(hw)) { err = PTR_ERR(hw); break; } - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + hw += LRC_STATE_OFFSET / sizeof(*hw); for (t = tbl; t->name; t++) { int dw = find_offset(hw, t->reg); @@ -4215,7 +4621,7 @@ static int live_lrc_fixed(void *arg) } } - i915_gem_object_unpin_map(engine->default_state); + shmem_unpin_map(engine->default_state, hw); } return err; @@ -4638,18 +5044,10 @@ static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) wmb(); } - if (i915_request_wait(rq, 0, HZ / 2) < 0) { - err = -ETIME; - goto err; - } - - /* and wait for switch to kernel */ - if (igt_flush_test(arg->engine->i915)) { - err = -EIO; + /* And wait for switch to kernel (to save our context to memory) */ + err = context_flush(arg->ce[0], HZ / 2); + if (err) goto err; - } - - rmb(); if (!timestamp_advanced(arg->poison, slot[1])) { pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", @@ -4674,9 +5072,9 @@ err: static int live_lrc_timestamp(void *arg) { + struct lrc_timestamp data = {}; struct intel_gt *gt = arg; enum intel_engine_id id; - struct lrc_timestamp data; const u32 poison[] = { 0, S32_MAX, @@ -4748,6 +5146,860 @@ err: return 0; } +static struct i915_vma * +create_user_vma(struct i915_address_space *vm, unsigned long size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static struct i915_vma * +store_context(struct intel_context *ce, struct i915_vma *scratch) +{ + struct i915_vma *batch; + u32 dw, x, *cs, *hw; + u32 *defaults; + + batch = create_user_vma(ce->vm, SZ_64K); + if (IS_ERR(batch)) + return batch; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(batch); + return ERR_CAST(cs); + } + + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + + x = 0; + dw = 0; + hw = defaults; + hw += LRC_STATE_OFFSET / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + while (len--) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = hw[dw]; + *cs++ = lower_32_bits(scratch->node.start + x); + *cs++ = upper_32_bits(scratch->node.start + x); + + dw += 2; + x += 4; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + *cs++ = MI_BATCH_BUFFER_END; + + shmem_unpin_map(ce->engine->default_state, defaults); + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + + return batch; +} + +static int move_to_active(struct i915_request *rq, + struct i915_vma *vma, + unsigned int flags) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, flags); + if (!err) + err = i915_vma_move_to_active(vma, rq, flags); + i915_vma_unlock(vma); + + return err; +} + +static struct i915_request * +record_registers(struct intel_context *ce, + struct i915_vma *before, + struct i915_vma *after, + u32 *sema) +{ + struct i915_vma *b_before, *b_after; + struct i915_request *rq; + u32 *cs; + int err; + + b_before = store_context(ce, before); + if (IS_ERR(b_before)) + return ERR_CAST(b_before); + + b_after = store_context(ce, after); + if (IS_ERR(b_after)) { + rq = ERR_CAST(b_after); + goto err_before; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto err_after; + + err = move_to_active(rq, before, EXEC_OBJECT_WRITE); + if (err) + goto err_rq; + + err = move_to_active(rq, b_before, 0); + if (err) + goto err_rq; + + err = move_to_active(rq, after, EXEC_OBJECT_WRITE); + if (err) + goto err_rq; + + err = move_to_active(rq, b_after, 0); + if (err) + goto err_rq; + + cs = intel_ring_begin(rq, 14); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(b_before->node.start); + *cs++ = upper_32_bits(b_before->node.start); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(sema); + *cs++ = 0; + *cs++ = MI_NOOP; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(b_after->node.start); + *cs++ = upper_32_bits(b_after->node.start); + + intel_ring_advance(rq, cs); + + WRITE_ONCE(*sema, 0); + i915_request_get(rq); + i915_request_add(rq); +err_after: + i915_vma_put(b_after); +err_before: + i915_vma_put(b_before); + return rq; + +err_rq: + i915_request_add(rq); + rq = ERR_PTR(err); + goto err_after; +} + +static struct i915_vma *load_context(struct intel_context *ce, u32 poison) +{ + struct i915_vma *batch; + u32 dw, *cs, *hw; + u32 *defaults; + + batch = create_user_vma(ce->vm, SZ_64K); + if (IS_ERR(batch)) + return batch; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(batch); + return ERR_CAST(cs); + } + + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + + dw = 0; + hw = defaults; + hw += LRC_STATE_OFFSET / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + *cs++ = MI_LOAD_REGISTER_IMM(len); + while (len--) { + *cs++ = hw[dw]; + *cs++ = poison; + dw += 2; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + *cs++ = MI_BATCH_BUFFER_END; + + shmem_unpin_map(ce->engine->default_state, defaults); + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + + return batch; +} + +static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) +{ + struct i915_request *rq; + struct i915_vma *batch; + u32 *cs; + int err; + + batch = load_context(ce, poison); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = move_to_active(rq, batch, 0); + if (err) + goto err_rq; + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(batch->node.start); + *cs++ = upper_32_bits(batch->node.start); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(sema); + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; +err_rq: + i915_request_add(rq); +err_batch: + i915_vma_put(batch); + return err; +} + +static bool is_moving(u32 a, u32 b) +{ + return a != b; +} + +static int compare_isolation(struct intel_engine_cs *engine, + struct i915_vma *ref[2], + struct i915_vma *result[2], + struct intel_context *ce, + u32 poison) +{ + u32 x, dw, *hw, *lrc; + u32 *A[2], *B[2]; + u32 *defaults; + int err = 0; + + A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); + if (IS_ERR(A[0])) + return PTR_ERR(A[0]); + + A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); + if (IS_ERR(A[1])) { + err = PTR_ERR(A[1]); + goto err_A0; + } + + B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); + if (IS_ERR(B[0])) { + err = PTR_ERR(B[0]); + goto err_A1; + } + + B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); + if (IS_ERR(B[1])) { + err = PTR_ERR(B[1]); + goto err_B0; + } + + lrc = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(engine->i915)); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_B1; + } + lrc += LRC_STATE_OFFSET / sizeof(*hw); + + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + err = -ENOMEM; + goto err_lrc; + } + + x = 0; + dw = 0; + hw = defaults; + hw += LRC_STATE_OFFSET / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + while (len--) { + if (!is_moving(A[0][x], A[1][x]) && + (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { + switch (hw[dw] & 4095) { + case 0x30: /* RING_HEAD */ + case 0x34: /* RING_TAIL */ + break; + + default: + pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", + engine->name, dw, + hw[dw], hw[dw + 1], + A[0][x], B[0][x], B[1][x], + poison, lrc[dw + 1]); + err = -EINVAL; + } + } + dw += 2; + x++; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + shmem_unpin_map(ce->engine->default_state, defaults); +err_lrc: + i915_gem_object_unpin_map(ce->state->obj); +err_B1: + i915_gem_object_unpin_map(result[1]->obj); +err_B0: + i915_gem_object_unpin_map(result[0]->obj); +err_A1: + i915_gem_object_unpin_map(ref[1]->obj); +err_A0: + i915_gem_object_unpin_map(ref[0]->obj); + return err; +} + +static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) +{ + u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); + struct i915_vma *ref[2], *result[2]; + struct intel_context *A, *B; + struct i915_request *rq; + int err; + + A = intel_context_create(engine); + if (IS_ERR(A)) + return PTR_ERR(A); + + B = intel_context_create(engine); + if (IS_ERR(B)) { + err = PTR_ERR(B); + goto err_A; + } + + ref[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[0])) { + err = PTR_ERR(ref[0]); + goto err_B; + } + + ref[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[1])) { + err = PTR_ERR(ref[1]); + goto err_ref0; + } + + rq = record_registers(A, ref[0], ref[1], sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ref1; + } + + WRITE_ONCE(*sema, 1); + wmb(); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_ref1; + } + i915_request_put(rq); + + result[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[0])) { + err = PTR_ERR(result[0]); + goto err_ref1; + } + + result[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[1])) { + err = PTR_ERR(result[1]); + goto err_result0; + } + + rq = record_registers(A, result[0], result[1], sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_result1; + } + + err = poison_registers(B, poison, sema); + if (err) { + WRITE_ONCE(*sema, -1); + i915_request_put(rq); + goto err_result1; + } + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_result1; + } + i915_request_put(rq); + + err = compare_isolation(engine, ref, result, A, poison); + +err_result1: + i915_vma_put(result[1]); +err_result0: + i915_vma_put(result[0]); +err_ref1: + i915_vma_put(ref[1]); +err_ref0: + i915_vma_put(ref[0]); +err_B: + intel_context_put(B); +err_A: + intel_context_put(A); + return err; +} + +static bool skip_isolation(const struct intel_engine_cs *engine) +{ + if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) + return true; + + if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) + return true; + + return false; +} + +static int live_lrc_isolation(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const u32 poison[] = { + STACK_MAGIC, + 0x3a3a3a3a, + 0x5c5c5c5c, + 0xffffffff, + 0xffff0000, + }; + int err = 0; + + /* + * Our goal is try and verify that per-context state cannot be + * tampered with by another non-privileged client. + * + * We take the list of context registers from the LRI in the default + * context image and attempt to modify that list from a remote context. + */ + + for_each_engine(engine, gt, id) { + int i; + + /* Just don't even ask */ + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && + skip_isolation(engine)) + continue; + + intel_engine_pm_get(engine); + for (i = 0; i < ARRAY_SIZE(poison); i++) { + int result; + + result = __lrc_isolation(engine, poison[i]); + if (result && !err) + err = result; + + result = __lrc_isolation(engine, ~poison[i]); + if (result && !err) + err = result; + } + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + return err; +} + +static int indirect_ctx_submit_req(struct intel_context *ce) +{ + struct i915_request *rq; + int err = 0; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + + i915_request_put(rq); + + return err; +} + +#define CTX_BB_CANARY_OFFSET (3 * 1024) +#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) + +static u32 * +emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(RING_START(0)); + *cs++ = i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce) + + CTX_BB_CANARY_OFFSET; + *cs++ = 0; + + return cs; +} + +static void +indirect_ctx_bb_setup(struct intel_context *ce) +{ + u32 *cs = context_indirect_bb(ce); + + cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; + + setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); +} + +static bool check_ring_start(struct intel_context *ce) +{ + const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - + LRC_STATE_OFFSET + context_wa_bb_offset(ce); + + if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) + return true; + + pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", + ctx_bb[CTX_BB_CANARY_INDEX], + ce->lrc_reg_state[CTX_RING_START]); + + return false; +} + +static int indirect_ctx_bb_check(struct intel_context *ce) +{ + int err; + + err = indirect_ctx_submit_req(ce); + if (err) + return err; + + if (!check_ring_start(ce)) + return -EINVAL; + + return 0; +} + +static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) +{ + struct intel_context *a, *b; + int err; + + a = intel_context_create(engine); + if (IS_ERR(a)) + return PTR_ERR(a); + err = intel_context_pin(a); + if (err) + goto put_a; + + b = intel_context_create(engine); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto unpin_a; + } + err = intel_context_pin(b); + if (err) + goto put_b; + + /* We use the already reserved extra page in context state */ + if (!a->wa_bb_page) { + GEM_BUG_ON(b->wa_bb_page); + GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); + goto unpin_b; + } + + /* + * In order to test that our per context bb is truly per context, + * and executes at the intended spot on context restoring process, + * make the batch store the ring start value to memory. + * As ring start is restored apriori of starting the indirect ctx bb and + * as it will be different for each context, it fits to this purpose. + */ + indirect_ctx_bb_setup(a); + indirect_ctx_bb_setup(b); + + err = indirect_ctx_bb_check(a); + if (err) + goto unpin_b; + + err = indirect_ctx_bb_check(b); + +unpin_b: + intel_context_unpin(b); +put_b: + intel_context_put(b); +unpin_a: + intel_context_unpin(a); +put_a: + intel_context_put(a); + + return err; +} + +static int live_lrc_indirect_ctx_bb(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_lrc_indirect_ctx_bb(engine); + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + + if (err) + break; + } + + return err; +} + +static void garbage_reset(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (test_and_set_bit(bit, lock)) + return; + + tasklet_disable(&engine->execlists.tasklet); + + if (!rq->fence.error) + intel_engine_reset(engine, NULL); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static struct i915_request *garbage(struct intel_context *ce, + struct rnd_state *prng) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(ce); + if (err) + return ERR_PTR(err); + + prandom_bytes_state(prng, + ce->lrc_reg_state, + ce->engine->context_size - + LRC_STATE_OFFSET); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + i915_request_get(rq); + i915_request_add(rq); + return rq; + +err_unpin: + intel_context_unpin(ce); + return ERR_PTR(err); +} + +static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) +{ + struct intel_context *ce; + struct i915_request *hang; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + hang = garbage(ce, prng); + if (IS_ERR(hang)) { + err = PTR_ERR(hang); + goto err_ce; + } + + if (wait_for_submit(engine, hang, HZ / 2)) { + i915_request_put(hang); + err = -ETIME; + goto err_ce; + } + + intel_context_set_banned(ce); + garbage_reset(engine, hang); + + intel_engine_flush_submission(engine); + if (!hang->fence.error) { + i915_request_put(hang); + pr_err("%s: corrupted context was not reset\n", + engine->name); + err = -EINVAL; + goto err_ce; + } + + if (i915_request_wait(hang, 0, HZ / 2) < 0) { + pr_err("%s: corrupted context did not recover\n", + engine->name); + i915_request_put(hang); + err = -EIO; + goto err_ce; + } + i915_request_put(hang); + +err_ce: + intel_context_put(ce); + return err; +} + +static int live_lrc_garbage(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Verify that we can recover if one context state is completely + * corrupted. + */ + + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) + return 0; + + for_each_engine(engine, gt, id) { + I915_RND_STATE(prng); + int err = 0, i; + + if (!intel_has_reset_engine(engine->gt)) + continue; + + intel_engine_pm_get(engine); + for (i = 0; i < 3; i++) { + err = __lrc_garbage(engine, &prng); + if (err) + break; + } + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + static int __live_pphwsp_runtime(struct intel_engine_cs *engine) { struct intel_context *ce; @@ -4845,8 +6097,11 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_fixed), SUBTEST(live_lrc_state), SUBTEST(live_lrc_gpr), + SUBTEST(live_lrc_isolation), SUBTEST(live_lrc_timestamp), + SUBTEST(live_lrc_garbage), SUBTEST(live_pphwsp_runtime), + SUBTEST(live_lrc_indirect_ctx_bb), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 5f7e2dcf5686..2dc460624bbc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -11,12 +11,30 @@ #include "selftest_rc6.h" #include "selftests/i915_random.h" +#include "selftests/librapl.h" + +static u64 rc6_residency(struct intel_rc6 *rc6) +{ + u64 result; + + /* XXX VLV_GT_MEDIA_RC6? */ + + result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + if (HAS_RC6p(rc6_to_i915(rc6))) + result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p); + if (HAS_RC6pp(rc6_to_i915(rc6))) + result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp); + + return result; +} int live_rc6_manual(void *arg) { struct intel_gt *gt = arg; struct intel_rc6 *rc6 = >->rc6; + u64 rc0_power, rc6_power; intel_wakeref_t wakeref; + ktime_t dt; u64 res[2]; int err = 0; @@ -38,9 +56,14 @@ int live_rc6_manual(void *arg) __intel_rc6_disable(rc6); msleep(1); /* wakeup is not immediate, takes about 100us on icl */ - res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[0] = rc6_residency(rc6); + + dt = ktime_get(); + rc0_power = librapl_energy_uJ(); msleep(250); - res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + rc0_power = librapl_energy_uJ() - rc0_power; + dt = ktime_sub(ktime_get(), dt); + res[1] = rc6_residency(rc6); if ((res[1] - res[0]) >> 10) { pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", (res[1] - res[0]) >> 10); @@ -48,13 +71,24 @@ int live_rc6_manual(void *arg) goto out_unlock; } + rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt)); + if (!rc0_power) { + pr_err("No power measured while in RC0\n"); + err = -EINVAL; + goto out_unlock; + } + /* Manually enter RC6 */ intel_rc6_park(rc6); - res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[0] = rc6_residency(rc6); + intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL); + dt = ktime_get(); + rc6_power = librapl_energy_uJ(); msleep(100); - res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); - + rc6_power = librapl_energy_uJ() - rc6_power; + dt = ktime_sub(ktime_get(), dt); + res[1] = rc6_residency(rc6); if (res[1] == res[0]) { pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n", intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), @@ -63,6 +97,15 @@ int live_rc6_manual(void *arg) err = -EINVAL; } + rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, ktime_to_ns(dt)); + pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n", + rc0_power, rc6_power); + if (2 * rc6_power > rc0_power) { + pr_err("GPU leaked energy while in RC6!\n"); + err = -EINVAL; + goto out_unlock; + } + /* Restore what should have been the original state! */ intel_rc6_unpark(rc6); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 6ad6aca315f6..35406ecdf0b2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -115,7 +115,7 @@ static int igt_atomic_engine_reset(void *arg) if (!intel_has_reset_engine(gt)) return 0; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; intel_gt_pm_get(gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c new file mode 100644 index 000000000000..3350e7c995bc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "intel_engine_pm.h" +#include "selftests/igt_flush_test.h" + +static struct i915_vma *create_wally(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + err = i915_vma_sync(vma); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_gem_object_put(obj); + return ERR_CAST(cs); + } + + if (INTEL_GEN(engine->i915) >= 6) { + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = 0; + } else if (INTEL_GEN(engine->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + } + *cs++ = vma->node.start + 4000; + *cs++ = STACK_MAGIC; + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + vma = ERR_CAST(vma->private); + i915_gem_object_put(obj); + } + + return vma; +} + +static int context_sync(struct intel_context *ce) +{ + struct i915_request *rq; + int err = 0; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int new_context_sync(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = context_sync(ce); + intel_context_put(ce); + + return err; +} + +static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result) +{ + int pass; + int err; + + for (pass = 0; pass < 2; pass++) { + WRITE_ONCE(*result, 0); + err = context_sync(engine->kernel_context); + if (err || READ_ONCE(*result)) { + if (!err) { + pr_err("pass[%d] wa_bb emitted for the kernel context\n", + pass); + err = -EINVAL; + } + return err; + } + + WRITE_ONCE(*result, 0); + err = new_context_sync(engine); + if (READ_ONCE(*result) != STACK_MAGIC) { + if (!err) { + pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n", + pass); + err = -EINVAL; + } + return err; + } + + WRITE_ONCE(*result, 0); + err = new_context_sync(engine); + if (READ_ONCE(*result) != STACK_MAGIC) { + if (!err) { + pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n", + pass); + err = -EINVAL; + } + return err; + } + } + + return 0; +} + +static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result) +{ + struct intel_context *ce; + int err, i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < 2; i++) { + WRITE_ONCE(*result, 0); + err = context_sync(ce); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + if (READ_ONCE(*result)) { + pr_err("wa_bb emitted between the same user context\n"); + return -EINVAL; + } + + return 0; +} + +static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result) +{ + struct intel_context *ce; + int err, i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < 2; i++) { + WRITE_ONCE(*result, 0); + err = context_sync(ce); + if (err) + break; + + err = context_sync(engine->kernel_context); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + if (READ_ONCE(*result)) { + pr_err("wa_bb emitted between the same user context [with intervening kernel]\n"); + return -EINVAL; + } + + return 0; +} + +static int __live_ctx_switch_wa(struct intel_engine_cs *engine) +{ + struct i915_vma *bb; + u32 *result; + int err; + + bb = create_wally(engine); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC); + if (IS_ERR(result)) { + intel_context_put(bb->private); + i915_vma_unpin_and_release(&bb, 0); + return PTR_ERR(result); + } + result += 1000; + + engine->wa_ctx.vma = bb; + + err = mixed_contexts_sync(engine, result); + if (err) + goto out; + + err = double_context_sync_00(engine, result); + if (err) + goto out; + + err = kernel_context_sync_00(engine, result); + if (err) + goto out; + +out: + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP); + return err; +} + +static int live_ctx_switch_wa(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Exercise the inter-context wa batch. + * + * Between each user context we run a wa batch, and since it may + * have implications for user visible state, we have to check that + * we do actually execute it. + * + * The trick we use is to replace the normal wa batch with a custom + * one that writes to a marker within it, and we can then look for + * that marker to confirm if the batch was run when we expect it, + * and equally important it was wasn't run when we don't! + */ + + for_each_engine(engine, gt, id) { + struct i915_vma *saved_wa; + int err; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (IS_GEN_RANGE(gt->i915, 4, 5)) + continue; /* MI_STORE_DWORD is privileged! */ + + saved_wa = fetch_and_zero(&engine->wa_ctx.vma); + + intel_engine_pm_get(engine); + err = __live_ctx_switch_wa(engine); + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) + err = -EIO; + + engine->wa_ctx.vma = saved_wa; + if (err) + return err; + } + + return 0; +} + +int intel_ring_submission_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_ctx_switch_wa), + }; + + if (HAS_EXECLISTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c new file mode 100644 index 000000000000..6275d69aa9cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -0,0 +1,1331 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/pm_qos.h> +#include <linux/sort.h> + +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_gpu_commands.h" +#include "intel_gt_clock_utils.h" +#include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "selftest_rps.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_spinner.h" +#include "selftests/librapl.h" + +/* Try to isolate the impact of cstates from determing frequency response */ +#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ + +static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine) +{ + unsigned long old; + + old = fetch_and_zero(&engine->props.heartbeat_interval_ms); + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); + + return old; +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + +static void dummy_rps_work(struct work_struct *wrk) +{ +} + +static int cmp_u64(const void *A, const void *B) +{ + const u64 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static struct i915_vma * +create_spin_counter(struct intel_engine_cs *engine, + struct i915_address_space *vm, + bool srm, + u32 **cancel, + u32 **counter) +{ + enum { + COUNT, + INC, + __NGPR__, + }; +#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long end; + u32 *base, *cs; + int loop, i; + int err; + + obj = i915_gem_object_create_internal(vm->i915, 64 << 10); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + end = obj->base.size / sizeof(u32) - 1; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + base = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(base)) { + i915_gem_object_put(obj); + return ERR_CAST(base); + } + cs = base; + + *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); + for (i = 0; i < __NGPR__; i++) { + *cs++ = i915_mmio_reg_offset(CS_GPR(i)); + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; + *cs++ = 0; + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); + *cs++ = 1; + + loop = cs - base; + + /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ + for (i = 0; i < 1024; i++) { + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); + + if (srm) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); + *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); + *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); + } + } + + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); + *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); + GEM_BUG_ON(cs - base > end); + + i915_gem_object_flush_map(obj); + + *cancel = base + loop; + *counter = srm ? memset32(base + end, 0, 1) : NULL; + return vma; +} + +static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) +{ + u8 history[64], i; + unsigned long end; + int sleep; + + i = 0; + memset(history, freq, sizeof(history)); + sleep = 20; + + /* The PCU does not change instantly, but drifts towards the goal? */ + end = jiffies + msecs_to_jiffies(timeout_ms); + do { + u8 act; + + act = read_cagf(rps); + if (time_after(jiffies, end)) + return act; + + /* Target acquired */ + if (act == freq) + return act; + + /* Any change within the last N samples? */ + if (!memchr_inv(history, act, sizeof(history))) + return act; + + history[i] = act; + i = (i + 1) % ARRAY_SIZE(history); + + usleep_range(sleep, 2 * sleep); + sleep *= 2; + if (sleep > timeout_ms * 20) + sleep = timeout_ms * 20; + } while (1); +} + +static u8 rps_set_check(struct intel_rps *rps, u8 freq) +{ + mutex_lock(&rps->lock); + GEM_BUG_ON(!intel_rps_is_active(rps)); + intel_rps_set(rps, freq); + GEM_BUG_ON(rps->last_freq != freq); + mutex_unlock(&rps->lock); + + return wait_for_freq(rps, freq, 50); +} + +static void show_pstate_limits(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_BROXTON(i915)) { + pr_info("P_STATE_CAP[%x]: 0x%08x\n", + i915_mmio_reg_offset(BXT_RP_STATE_CAP), + intel_uncore_read(rps_to_uncore(rps), + BXT_RP_STATE_CAP)); + } else if (IS_GEN(i915, 9)) { + pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", + i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), + intel_uncore_read(rps_to_uncore(rps), + GEN9_RP_STATE_LIMITS)); + } +} + +int live_rps_clock_interval(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + intel_gt_pm_get(gt); + intel_rps_disable(>->rps); + + intel_gt_check_clock_frequency(gt); + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + u32 cycles; + u64 dt; + + if (!intel_engine_can_store_dword(engine)) + continue; + + saved_heartbeat = engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + engine_heartbeat_enable(engine, saved_heartbeat); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); + + /* Set the evaluation interval to infinity! */ + intel_uncore_write_fw(gt->uncore, + GEN6_RP_UP_EI, 0xffffffff); + intel_uncore_write_fw(gt->uncore, + GEN6_RP_UP_THRESHOLD, 0xffffffff); + + intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, + GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); + + if (wait_for(intel_uncore_read_fw(gt->uncore, + GEN6_RP_CUR_UP_EI), + 10)) { + /* Just skip the test; assume lack of HW support */ + pr_notice("%s: rps evaluation interval not ticking\n", + engine->name); + err = -ENODEV; + } else { + ktime_t dt_[5]; + u32 cycles_[5]; + int i; + + for (i = 0; i < 5; i++) { + preempt_disable(); + + dt_[i] = ktime_get(); + cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); + + udelay(1000); + + dt_[i] = ktime_sub(ktime_get(), dt_[i]); + cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); + + preempt_enable(); + } + + /* Use the median of both cycle/dt; close enough */ + sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); + cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; + sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); + dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); + } + + intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + + if (err == 0) { + u64 time = intel_gt_pm_interval_to_ns(gt, cycles); + u32 expected = + intel_gt_ns_to_pm_interval(gt, dt); + + pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", + engine->name, cycles, time, dt, expected, + gt->clock_frequency / 1000); + + if (10 * time < 8 * dt || + 8 * time > 10 * dt) { + pr_err("%s: rps clock time does not match walltime!\n", + engine->name); + err = -EINVAL; + } + + if (10 * expected < 8 * cycles || + 8 * expected > 10 * cycles) { + pr_err("%s: walltime does not match rps clock ticks!\n", + engine->name); + err = -EINVAL; + } + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + break; /* once is enough */ + } + + intel_rps_enable(>->rps); + intel_gt_pm_put(gt); + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + if (err == -ENODEV) /* skipped, don't report a fail */ + err = 0; + + return err; +} + +int live_rps_control(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * Check that the actual frequency matches our requested frequency, + * to verify our control mechanism. We have to be careful that the + * PCU may throttle the GPU in which case the actual frequency used + * will be lowered than requested. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + ktime_t min_dt, max_dt; + int f, limit; + int min, max; + + if (!intel_engine_can_store_dword(engine)) + continue; + + saved_heartbeat = engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { + pr_err("%s: could not set minimum frequency [%x], only %x!\n", + engine->name, rps->min_freq, read_cagf(rps)); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + show_pstate_limits(rps); + err = -EINVAL; + break; + } + + for (f = rps->min_freq + 1; f < rps->max_freq; f++) { + if (rps_set_check(rps, f) < f) + break; + } + + limit = rps_set_check(rps, f); + + if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { + pr_err("%s: could not restore minimum frequency [%x], only %x!\n", + engine->name, rps->min_freq, read_cagf(rps)); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + show_pstate_limits(rps); + err = -EINVAL; + break; + } + + max_dt = ktime_get(); + max = rps_set_check(rps, limit); + max_dt = ktime_sub(ktime_get(), max_dt); + + min_dt = ktime_get(); + min = rps_set_check(rps, rps->min_freq); + min_dt = ktime_sub(ktime_get(), min_dt); + + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + + pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", + engine->name, + rps->min_freq, intel_gpu_freq(rps, rps->min_freq), + rps->max_freq, intel_gpu_freq(rps, rps->max_freq), + limit, intel_gpu_freq(rps, limit), + min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); + + if (limit == rps->min_freq) { + pr_err("%s: GPU throttled to minimum!\n", + engine->name); + show_pstate_limits(rps); + err = -ENODEV; + break; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + intel_gt_pm_put(gt); + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + return err; +} + +static void show_pcu_config(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; + int gpu_freq; + + if (!HAS_LLC(i915)) + return; + + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (INTEL_GEN(i915) >= 9) { + /* Convert GT frequency to 50 HZ units */ + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; + } + + wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); + + pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); + for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { + int ia_freq = gpu_freq; + + sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq, NULL); + + pr_info("%5d %5d %5d\n", + gpu_freq * 50, + ((ia_freq >> 0) & 0xff) * 100, + ((ia_freq >> 8) & 0xff) * 100); + } + + intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); +} + +static u64 __measure_frequency(u32 *cntr, int duration_ms) +{ + u64 dc, dt; + + dt = ktime_get(); + dc = READ_ONCE(*cntr); + usleep_range(1000 * duration_ms, 2000 * duration_ms); + dc = READ_ONCE(*cntr) - dc; + dt = ktime_get() - dt; + + return div64_u64(1000 * 1000 * dc, dt); +} + +static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) +{ + u64 x[5]; + int i; + + *freq = rps_set_check(rps, *freq); + for (i = 0; i < 5; i++) + x[i] = __measure_frequency(cntr, 2); + *freq = (*freq + read_cagf(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + +static u64 __measure_cs_frequency(struct intel_engine_cs *engine, + int duration_ms) +{ + u64 dc, dt; + + dt = ktime_get(); + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); + usleep_range(1000 * duration_ms, 2000 * duration_ms); + dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; + dt = ktime_get() - dt; + + return div64_u64(1000 * 1000 * dc, dt); +} + +static u64 measure_cs_frequency_at(struct intel_rps *rps, + struct intel_engine_cs *engine, + int *freq) +{ + u64 x[5]; + int i; + + *freq = rps_set_check(rps, *freq); + for (i = 0; i < 5; i++) + x[i] = __measure_cs_frequency(engine, 2); + *freq = (*freq + read_cagf(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + +static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) +{ + return f_d * x > f_n * y && f_n * x < f_d * y; +} + +int live_rps_frequency_cs(void *arg) +{ + void (*saved_work)(struct work_struct *wrk); + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + struct pm_qos_request qos; + enum intel_engine_id id; + int err = 0; + + /* + * The premise is that the GPU does change freqency at our behest. + * Let's check there is a correspondence between the requested + * frequency, the actual frequency, and the observed clock rate. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ + return 0; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_add_request(&qos, CPU_LATENCY); + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cancel, *cntr; + struct { + u64 count; + int freq; + } min, max; + + saved_heartbeat = engine_heartbeat_disable(engine); + + vma = create_spin_counter(engine, + engine->kernel_context->vm, false, + &cancel, &cntr); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + engine_heartbeat_enable(engine, saved_heartbeat); + break; + } + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_vma; + + if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), + 10)) { + pr_err("%s: timed loop did not start\n", + engine->name); + goto err_vma; + } + + min.freq = rps->min_freq; + min.count = measure_cs_frequency_at(rps, engine, &min.freq); + + max.freq = rps->max_freq; + max.count = measure_cs_frequency_at(rps, engine, &max.freq); + + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", + engine->name, + min.count, intel_gpu_freq(rps, min.freq), + max.count, intel_gpu_freq(rps, max.freq), + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, + max.freq * min.count)); + + if (!scaled_within(max.freq * min.count, + min.freq * max.count, + 2, 3)) { + int f; + + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", + engine->name, + max.freq * min.count, + min.freq * max.count); + show_pcu_config(rps); + + for (f = min.freq + 1; f <= rps->max_freq; f++) { + int act = f; + u64 count; + + count = measure_cs_frequency_at(rps, engine, &act); + if (act < f) + break; + + pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", + engine->name, + act, intel_gpu_freq(rps, act), count, + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, + act * min.count)); + + f = act; /* may skip ahead [pcu granularity] */ + } + + err = -EINVAL; + } + +err_vma: + *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + i915_vma_unpin(vma); + i915_vma_put(vma); + + engine_heartbeat_enable(engine, saved_heartbeat); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_remove_request(&qos); + + return err; +} + +int live_rps_frequency_srm(void *arg) +{ + void (*saved_work)(struct work_struct *wrk); + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + struct pm_qos_request qos; + enum intel_engine_id id; + int err = 0; + + /* + * The premise is that the GPU does change freqency at our behest. + * Let's check there is a correspondence between the requested + * frequency, the actual frequency, and the observed clock rate. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ + return 0; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_add_request(&qos, CPU_LATENCY); + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cancel, *cntr; + struct { + u64 count; + int freq; + } min, max; + + saved_heartbeat = engine_heartbeat_disable(engine); + + vma = create_spin_counter(engine, + engine->kernel_context->vm, true, + &cancel, &cntr); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + engine_heartbeat_enable(engine, saved_heartbeat); + break; + } + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_vma; + + if (wait_for(READ_ONCE(*cntr), 10)) { + pr_err("%s: timed loop did not start\n", + engine->name); + goto err_vma; + } + + min.freq = rps->min_freq; + min.count = measure_frequency_at(rps, cntr, &min.freq); + + max.freq = rps->max_freq; + max.count = measure_frequency_at(rps, cntr, &max.freq); + + pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", + engine->name, + min.count, intel_gpu_freq(rps, min.freq), + max.count, intel_gpu_freq(rps, max.freq), + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, + max.freq * min.count)); + + if (!scaled_within(max.freq * min.count, + min.freq * max.count, + 1, 2)) { + int f; + + pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", + engine->name, + max.freq * min.count, + min.freq * max.count); + show_pcu_config(rps); + + for (f = min.freq + 1; f <= rps->max_freq; f++) { + int act = f; + u64 count; + + count = measure_frequency_at(rps, cntr, &act); + if (act < f) + break; + + pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", + engine->name, + act, intel_gpu_freq(rps, act), count, + (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, + act * min.count)); + + f = act; /* may skip ahead [pcu granularity] */ + } + + err = -EINVAL; + } + +err_vma: + *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + i915_vma_unpin(vma); + i915_vma_put(vma); + + engine_heartbeat_enable(engine, saved_heartbeat); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + if (CPU_LATENCY >= 0) + cpu_latency_qos_remove_request(&qos); + + return err; +} + +static void sleep_for_ei(struct intel_rps *rps, int timeout_us) +{ + /* Flush any previous EI */ + usleep_range(timeout_us, 2 * timeout_us); + + /* Reset the interrupt status */ + rps_disable_interrupts(rps); + GEM_BUG_ON(rps->pm_iir); + rps_enable_interrupts(rps); + + /* And then wait for the timeout, for real this time */ + usleep_range(2 * timeout_us, 3 * timeout_us); +} + +static int __rps_up_interrupt(struct intel_rps *rps, + struct intel_engine_cs *engine, + struct igt_spinner *spin) +{ + struct intel_uncore *uncore = engine->uncore; + struct i915_request *rq; + u32 timeout; + + if (!intel_engine_can_store_dword(engine)) + return 0; + + rps_set_check(rps, rps->min_freq); + + rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (!igt_wait_for_spinner(spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + i915_request_put(rq); + intel_gt_set_wedged(engine->gt); + return -EIO; + } + + if (!intel_rps_is_active(rps)) { + pr_err("%s: RPS not enabled on starting spinner\n", + engine->name); + igt_spinner_end(spin); + i915_request_put(rq); + return -EINVAL; + } + + if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { + pr_err("%s: RPS did not register UP interrupt\n", + engine->name); + i915_request_put(rq); + return -EINVAL; + } + + if (rps->last_freq != rps->min_freq) { + pr_err("%s: RPS did not program min frequency\n", + engine->name); + i915_request_put(rq); + return -EINVAL; + } + + timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); + timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); + timeout = DIV_ROUND_UP(timeout, 1000); + + sleep_for_ei(rps, timeout); + GEM_BUG_ON(i915_request_completed(rq)); + + igt_spinner_end(spin); + i915_request_put(rq); + + if (rps->cur_freq != rps->min_freq) { + pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", + engine->name, intel_rps_read_actual_frequency(rps)); + return -EINVAL; + } + + if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { + pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", + engine->name, rps->pm_iir, + intel_uncore_read(uncore, GEN6_RP_PREV_UP), + intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), + intel_uncore_read(uncore, GEN6_RP_UP_EI)); + return -EINVAL; + } + + return 0; +} + +static int __rps_down_interrupt(struct intel_rps *rps, + struct intel_engine_cs *engine) +{ + struct intel_uncore *uncore = engine->uncore; + u32 timeout; + + rps_set_check(rps, rps->max_freq); + + if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { + pr_err("%s: RPS did not register DOWN interrupt\n", + engine->name); + return -EINVAL; + } + + if (rps->last_freq != rps->max_freq) { + pr_err("%s: RPS did not program max frequency\n", + engine->name); + return -EINVAL; + } + + timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); + timeout = DIV_ROUND_UP(timeout, 1000); + + sleep_for_ei(rps, timeout); + + if (rps->cur_freq != rps->max_freq) { + pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", + engine->name, + intel_rps_read_actual_frequency(rps)); + return -EINVAL; + } + + if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { + pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", + engine->name, rps->pm_iir, + intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), + intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), + intel_uncore_read(uncore, GEN6_RP_DOWN_EI), + intel_uncore_read(uncore, GEN6_RP_PREV_UP), + intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), + intel_uncore_read(uncore, GEN6_RP_UP_EI)); + return -EINVAL; + } + + return 0; +} + +int live_rps_interrupt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + u32 pm_events; + int err = 0; + + /* + * First, let's check whether or not we are receiving interrupts. + */ + + if (!intel_rps_has_interrupts(rps)) + return 0; + + intel_gt_pm_get(gt); + pm_events = rps->pm_events; + intel_gt_pm_put(gt); + if (!pm_events) { + pr_err("No RPS PM events registered, but RPS is enabled?\n"); + return -ENODEV; + } + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + /* Keep the engine busy with a spinner; expect an UP! */ + if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { + unsigned long saved_heartbeat; + + intel_gt_pm_wait_for_idle(engine->gt); + GEM_BUG_ON(intel_rps_is_active(rps)); + + saved_heartbeat = engine_heartbeat_disable(engine); + + err = __rps_up_interrupt(rps, engine, &spin); + + engine_heartbeat_enable(engine, saved_heartbeat); + if (err) + goto out; + + intel_gt_pm_wait_for_idle(engine->gt); + } + + /* Keep the engine awake but idle and check for DOWN */ + if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { + unsigned long saved_heartbeat; + + saved_heartbeat = engine_heartbeat_disable(engine); + intel_rc6_disable(>->rc6); + + err = __rps_down_interrupt(rps, engine); + + intel_rc6_enable(>->rc6); + engine_heartbeat_enable(engine, saved_heartbeat); + if (err) + goto out; + } + } + +out: + if (igt_flush_test(gt->i915)) + err = -EIO; + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + return err; +} + +static u64 __measure_power(int duration_ms) +{ + u64 dE, dt; + + dt = ktime_get(); + dE = librapl_energy_uJ(); + usleep_range(1000 * duration_ms, 2000 * duration_ms); + dE = librapl_energy_uJ() - dE; + dt = ktime_get() - dt; + + return div64_u64(1000 * 1000 * dE, dt); +} + +static u64 measure_power_at(struct intel_rps *rps, int *freq) +{ + u64 x[5]; + int i; + + *freq = rps_set_check(rps, *freq); + for (i = 0; i < 5; i++) + x[i] = __measure_power(5); + *freq = (*freq + read_cagf(rps)) / 2; + + /* A simple triangle filter for better result stability */ + sort(x, 5, sizeof(*x), cmp_u64, NULL); + return div_u64(x[1] + 2 * x[2] + x[3], 4); +} + +int live_rps_power(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + void (*saved_work)(struct work_struct *wrk); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * Our fundamental assumption is that running at lower frequency + * actually saves power. Let's see if our RAPL measurement support + * that theory. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (!librapl_energy_uJ()) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + intel_gt_pm_wait_for_idle(gt); + saved_work = rps->work.func; + rps->work.func = dummy_rps_work; + + for_each_engine(engine, gt, id) { + unsigned long saved_heartbeat; + struct i915_request *rq; + struct { + u64 power; + int freq; + } min, max; + + if (!intel_engine_can_store_dword(engine)) + continue; + + saved_heartbeat = engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + engine_heartbeat_enable(engine, saved_heartbeat); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: RPS spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + max.freq = rps->max_freq; + max.power = measure_power_at(rps, &max.freq); + + min.freq = rps->min_freq; + min.power = measure_power_at(rps, &min.freq); + + igt_spinner_end(&spin); + engine_heartbeat_enable(engine, saved_heartbeat); + + pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", + engine->name, + min.power, intel_gpu_freq(rps, min.freq), + max.power, intel_gpu_freq(rps, max.freq)); + + if (10 * min.freq >= 9 * max.freq) { + pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", + min.freq, intel_gpu_freq(rps, min.freq), + max.freq, intel_gpu_freq(rps, max.freq)); + continue; + } + + if (11 * min.power > 10 * max.power) { + pr_err("%s: did not conserve power when setting lower frequency!\n", + engine->name); + err = -EINVAL; + break; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + + intel_gt_pm_wait_for_idle(gt); + rps->work.func = saved_work; + + return err; +} + +int live_rps_dynamic(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We've looked at the bascs, and have established that we + * can change the clock frequency and that the HW will generate + * interrupts based on load. Now we check how we integrate those + * moving parts into dynamic reclocking based on load. + */ + + if (!intel_rps_is_enabled(rps)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct i915_request *rq; + struct { + ktime_t dt; + u8 freq; + } min, max; + + if (!intel_engine_can_store_dword(engine)) + continue; + + intel_gt_pm_wait_for_idle(gt); + GEM_BUG_ON(intel_rps_is_active(rps)); + rps->cur_freq = rps->min_freq; + + intel_engine_pm_get(engine); + intel_rc6_disable(>->rc6); + GEM_BUG_ON(rps->last_freq != rps->min_freq); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err; + } + + i915_request_add(rq); + + max.dt = ktime_get(); + max.freq = wait_for_freq(rps, rps->max_freq, 500); + max.dt = ktime_sub(ktime_get(), max.dt); + + igt_spinner_end(&spin); + + min.dt = ktime_get(); + min.freq = wait_for_freq(rps, rps->min_freq, 2000); + min.dt = ktime_sub(ktime_get(), min.dt); + + pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", + engine->name, + max.freq, intel_gpu_freq(rps, max.freq), + ktime_to_ns(max.dt), + min.freq, intel_gpu_freq(rps, min.freq), + ktime_to_ns(min.dt)); + if (min.freq >= max.freq) { + pr_err("%s: dynamic reclocking of spinner failed\n!", + engine->name); + err = -EINVAL; + } + +err: + intel_rc6_enable(>->rc6); + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h new file mode 100644 index 000000000000..6e82a631cfa1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_rps.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef SELFTEST_RPS_H +#define SELFTEST_RPS_H + +int live_rps_control(void *arg); +int live_rps_clock_interval(void *arg); +int live_rps_frequency_cs(void *arg); +int live_rps_frequency_srm(void *arg); +int live_rps_power(void *arg); +int live_rps_interrupt(void *arg); +int live_rps_dynamic(void *arg); + +#endif /* SELFTEST_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c new file mode 100644 index 000000000000..43c7acbdc79d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/shmem_fs.h> + +#include "gem/i915_gem_object.h" +#include "shmem_utils.h" + +struct file *shmem_create_from_data(const char *name, void *data, size_t len) +{ + struct file *file; + int err; + + file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE); + if (IS_ERR(file)) + return file; + + err = shmem_write(file, 0, data, len); + if (err) { + fput(file); + return ERR_PTR(err); + } + + return file; +} + +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) +{ + struct file *file; + void *ptr; + + if (obj->ops == &i915_gem_shmem_ops) { + file = obj->base.filp; + atomic_long_inc(&file->f_count); + return file; + } + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(ptr)) + return ERR_CAST(ptr); + + file = shmem_create_from_data("", ptr, obj->base.size); + i915_gem_object_unpin_map(obj); + + return file; +} + +static size_t shmem_npte(struct file *file) +{ + return file->f_mapping->host->i_size >> PAGE_SHIFT; +} + +static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte) +{ + unsigned long pfn; + + vunmap(ptr); + + for (pfn = 0; pfn < n_pte; pfn++) { + struct page *page; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (!WARN_ON(IS_ERR(page))) { + put_page(page); + put_page(page); + } + } +} + +void *shmem_pin_map(struct file *file) +{ + const size_t n_pte = shmem_npte(file); + pte_t *stack[32], **ptes, **mem; + struct vm_struct *area; + unsigned long pfn; + + mem = stack; + if (n_pte > ARRAY_SIZE(stack)) { + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); + if (!mem) + return NULL; + } + + area = alloc_vm_area(n_pte << PAGE_SHIFT, mem); + if (!area) { + if (mem != stack) + kvfree(mem); + return NULL; + } + + ptes = mem; + for (pfn = 0; pfn < n_pte; pfn++) { + struct page *page; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (IS_ERR(page)) + goto err_page; + + **ptes++ = mk_pte(page, PAGE_KERNEL); + } + + if (mem != stack) + kvfree(mem); + + mapping_set_unevictable(file->f_mapping); + return area->addr; + +err_page: + if (mem != stack) + kvfree(mem); + + __shmem_unpin_map(file, area->addr, pfn); + return NULL; +} + +void shmem_unpin_map(struct file *file, void *ptr) +{ + mapping_clear_unevictable(file->f_mapping); + __shmem_unpin_map(file, ptr, shmem_npte(file)); +} + +static int __shmem_rw(struct file *file, loff_t off, + void *ptr, size_t len, + bool write) +{ + unsigned long pfn; + + for (pfn = off >> PAGE_SHIFT; len; pfn++) { + unsigned int this = + min_t(size_t, PAGE_SIZE - offset_in_page(off), len); + struct page *page; + void *vaddr; + + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, + GFP_KERNEL); + if (IS_ERR(page)) + return PTR_ERR(page); + + vaddr = kmap(page); + if (write) + memcpy(vaddr + offset_in_page(off), ptr, this); + else + memcpy(ptr, vaddr + offset_in_page(off), this); + kunmap(page); + put_page(page); + + len -= this; + ptr += this; + off = 0; + } + + return 0; +} + +int shmem_read(struct file *file, loff_t off, void *dst, size_t len) +{ + return __shmem_rw(file, off, dst, len, false); +} + +int shmem_write(struct file *file, loff_t off, void *src, size_t len) +{ + return __shmem_rw(file, off, src, len, true); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "st_shmem_utils.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h new file mode 100644 index 000000000000..c1669170c351 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shmem_utils.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef SHMEM_UTILS_H +#define SHMEM_UTILS_H + +#include <linux/types.h> + +struct drm_i915_gem_object; +struct file; + +struct file *shmem_create_from_data(const char *name, void *data, size_t len); +struct file *shmem_create_from_object(struct drm_i915_gem_object *obj); + +void *shmem_pin_map(struct file *file); +void shmem_unpin_map(struct file *file, void *ptr); + +int shmem_read(struct file *file, loff_t off, void *dst, size_t len); +int shmem_write(struct file *file, loff_t off, void *src, size_t len); + +#endif /* SHMEM_UTILS_H */ diff --git a/drivers/gpu/drm/i915/gt/st_shmem_utils.c b/drivers/gpu/drm/i915/gt/st_shmem_utils.c new file mode 100644 index 000000000000..b279fe88b70e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/st_shmem_utils.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* Just a quick and causal check of the shmem_utils API */ + +static int igt_shmem_basic(void *ignored) +{ + u32 datum = 0xdeadbeef, result; + struct file *file; + u32 *map; + int err; + + file = shmem_create_from_data("mock", &datum, sizeof(datum)); + if (IS_ERR(file)) + return PTR_ERR(file); + + result = 0; + err = shmem_read(file, 0, &result, sizeof(result)); + if (err) + goto out_file; + + if (result != datum) { + pr_err("Incorrect read back from shmemfs: %x != %x\n", + result, datum); + err = -EINVAL; + goto out_file; + } + + result = 0xc0ffee; + err = shmem_write(file, 0, &result, sizeof(result)); + if (err) + goto out_file; + + map = shmem_pin_map(file); + if (!map) { + err = -ENOMEM; + goto out_file; + } + + if (*map != result) { + pr_err("Incorrect read back via mmap of last write: %x != %x\n", + *map, result); + err = -EINVAL; + goto out_map; + } + +out_map: + shmem_unpin_map(file, map); +out_file: + fput(file); + return err; +} + +int shmem_utils_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shmem_basic), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c new file mode 100644 index 000000000000..535cc1169e54 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -0,0 +1,539 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "intel_engine.h" +#include "intel_engine_heartbeat.h" +#include "sysfs_engines.h" + +struct kobj_engine { + struct kobject base; + struct intel_engine_cs *engine; +}; + +static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_engine, base)->engine; +} + +static ssize_t +name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name); +} + +static struct kobj_attribute name_attr = +__ATTR(name, 0444, name_show, NULL); + +static ssize_t +class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class); +} + +static struct kobj_attribute class_attr = +__ATTR(class, 0444, class_show, NULL); + +static ssize_t +inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance); +} + +static struct kobj_attribute inst_attr = +__ATTR(instance, 0444, inst_show, NULL); + +static ssize_t +mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base); +} + +static struct kobj_attribute mmio_attr = +__ATTR(mmio_base, 0444, mmio_show, NULL); + +static const char * const vcs_caps[] = { + [ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc", + [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc", +}; + +static const char * const vecs_caps[] = { + [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc", +}; + +static ssize_t repr_trim(char *buf, ssize_t len) +{ + /* Trim off the trailing space and replace with a newline */ + if (len > PAGE_SIZE) + len = PAGE_SIZE; + if (len > 0) + buf[len - 1] = '\n'; + + return len; +} + +static ssize_t +__caps_show(struct intel_engine_cs *engine, + u32 caps, char *buf, bool show_unknown) +{ + const char * const *repr; + int count, n; + ssize_t len; + + BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities)); + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + repr = vcs_caps; + count = ARRAY_SIZE(vcs_caps); + break; + + case VIDEO_ENHANCEMENT_CLASS: + repr = vecs_caps; + count = ARRAY_SIZE(vecs_caps); + break; + + default: + repr = NULL; + count = 0; + break; + } + GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps))); + + len = 0; + for_each_set_bit(n, + (unsigned long *)&caps, + show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) { + if (n >= count || !repr[n]) { + if (GEM_WARN_ON(show_unknown)) + len += snprintf(buf + len, PAGE_SIZE - len, + "[%x] ", n); + } else { + len += snprintf(buf + len, PAGE_SIZE - len, + "%s ", repr[n]); + } + if (GEM_WARN_ON(len >= PAGE_SIZE)) + break; + } + return repr_trim(buf, len); +} + +static ssize_t +caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return __caps_show(engine, engine->uabi_capabilities, buf, true); +} + +static struct kobj_attribute caps_attr = +__ATTR(capabilities, 0444, caps_show, NULL); + +static ssize_t +all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return __caps_show(kobj_to_engine(kobj), -1, buf, false); +} + +static struct kobj_attribute all_caps_attr = +__ATTR(known_capabilities, 0444, all_caps_show, NULL); + +static ssize_t +max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * When waiting for a request, if is it currently being executed + * on the GPU, we busywait for a short while before sleeping. The + * premise is that most requests are short, and if it is already + * executing then there is a good chance that it will complete + * before we can setup the interrupt handler and go to sleep. + * We try to offset the cost of going to sleep, by first spinning + * on the request -- if it completed in less time than it would take + * to go sleep, process the interrupt and return back to the client, + * then we have saved the client some latency, albeit at the cost + * of spinning on an expensive CPU core. + * + * While we try to avoid waiting at all for a request that is unlikely + * to complete, deciding how long it is worth spinning is for is an + * arbitrary decision: trading off power vs latency. + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_nsecs(2)) + return -EINVAL; + + WRITE_ONCE(engine->props.max_busywait_duration_ns, duration); + + return count; +} + +static ssize_t +max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns); +} + +static struct kobj_attribute max_spin_attr = +__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store); + +static ssize_t +max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns); +} + +static struct kobj_attribute max_spin_def = +__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL); + +static ssize_t +timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * Execlists uses a scheduling quantum (a timeslice) to alternate + * execution between ready-to-run contexts of equal priority. This + * ensures that all users (though only if they of equal importance) + * have the opportunity to run and prevents livelocks where contexts + * may have implicit ordering due to userspace semaphores. + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.timeslice_duration_ms, duration); + + if (execlists_active(&engine->execlists)) + set_timer_ms(&engine->execlists.timer, duration); + + return count; +} + +static ssize_t +timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.timeslice_duration_ms); +} + +static struct kobj_attribute timeslice_duration_attr = +__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store); + +static ssize_t +timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms); +} + +static struct kobj_attribute timeslice_duration_def = +__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL); + +static ssize_t +stop_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * When we allow ourselves to sleep before a GPU reset after disabling + * submission, even for a few milliseconds, gives an innocent context + * the opportunity to clear the GPU before the reset occurs. However, + * how long to sleep depends on the typical non-preemptible duration + * (a similar problem to determining the ideal preempt-reset timeout + * or even the heartbeat interval). + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.stop_timeout_ms, duration); + return count; +} + +static ssize_t +stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms); +} + +static struct kobj_attribute stop_timeout_attr = +__ATTR(stop_timeout_ms, 0644, stop_show, stop_store); + +static ssize_t +stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms); +} + +static struct kobj_attribute stop_timeout_def = +__ATTR(stop_timeout_ms, 0444, stop_default, NULL); + +static ssize_t +preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long timeout; + int err; + + /* + * After initialising a preemption request, we give the current + * resident a small amount of time to vacate the GPU. The preemption + * request is for a higher priority context and should be immediate to + * maintain high quality of service (and avoid priority inversion). + * However, the preemption granularity of the GPU can be quite coarse + * and so we need a compromise. + */ + + err = kstrtoull(buf, 0, &timeout); + if (err) + return err; + + if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.preempt_timeout_ms, timeout); + + if (READ_ONCE(engine->execlists.pending[0])) + set_timer_ms(&engine->execlists.preempt, timeout); + + return count; +} + +static ssize_t +preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.preempt_timeout_ms); +} + +static struct kobj_attribute preempt_timeout_attr = +__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store); + +static ssize_t +preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms); +} + +static struct kobj_attribute preempt_timeout_def = +__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL); + +static ssize_t +heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long delay; + int err; + + /* + * We monitor the health of the system via periodic heartbeat pulses. + * The pulses also provide the opportunity to perform garbage + * collection. However, we interpret an incomplete pulse (a missed + * heartbeat) as an indication that the system is no longer responsive, + * i.e. hung, and perform an engine or full GPU reset. Given that the + * preemption granularity can be very coarse on a system, the optimal + * value for any workload is unknowable! + */ + + err = kstrtoull(buf, 0, &delay); + if (err) + return err; + + if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + err = intel_engine_set_heartbeat(engine, delay); + if (err) + return err; + + return count; +} + +static ssize_t +heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.heartbeat_interval_ms); +} + +static struct kobj_attribute heartbeat_interval_attr = +__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store); + +static ssize_t +heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms); +} + +static struct kobj_attribute heartbeat_interval_def = +__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL); + +static void kobj_engine_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static struct kobj_type kobj_engine_type = { + .release = kobj_engine_release, + .sysfs_ops = &kobj_sysfs_ops +}; + +static struct kobject * +kobj_engine(struct kobject *dir, struct intel_engine_cs *engine) +{ + struct kobj_engine *ke; + + ke = kzalloc(sizeof(*ke), GFP_KERNEL); + if (!ke) + return NULL; + + kobject_init(&ke->base, &kobj_engine_type); + ke->engine = engine; + + if (kobject_add(&ke->base, dir, "%s", engine->name)) { + kobject_put(&ke->base); + return NULL; + } + + /* xfer ownership to sysfs tree */ + return &ke->base; +} + +static void add_defaults(struct kobj_engine *parent) +{ + static const struct attribute *files[] = { + &max_spin_def.attr, + &stop_timeout_def.attr, +#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL + &heartbeat_interval_def.attr, +#endif + NULL + }; + struct kobj_engine *ke; + + ke = kzalloc(sizeof(*ke), GFP_KERNEL); + if (!ke) + return; + + kobject_init(&ke->base, &kobj_engine_type); + ke->engine = parent->engine; + + if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) { + kobject_put(&ke->base); + return; + } + + if (sysfs_create_files(&ke->base, files)) + return; + + if (intel_engine_has_timeslices(ke->engine) && + sysfs_create_file(&ke->base, ×lice_duration_def.attr)) + return; + + if (intel_engine_has_preempt_reset(ke->engine) && + sysfs_create_file(&ke->base, &preempt_timeout_def.attr)) + return; +} + +void intel_engines_add_sysfs(struct drm_i915_private *i915) +{ + static const struct attribute *files[] = { + &name_attr.attr, + &class_attr.attr, + &inst_attr.attr, + &mmio_attr.attr, + &caps_attr.attr, + &all_caps_attr.attr, + &max_spin_attr.attr, + &stop_timeout_attr.attr, +#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL + &heartbeat_interval_attr.attr, +#endif + NULL + }; + + struct device *kdev = i915->drm.primary->kdev; + struct intel_engine_cs *engine; + struct kobject *dir; + + dir = kobject_create_and_add("engine", &kdev->kobj); + if (!dir) + return; + + for_each_uabi_engine(engine, i915) { + struct kobject *kobj; + + kobj = kobj_engine(dir, engine); + if (!kobj) + goto err_engine; + + if (sysfs_create_files(kobj, files)) + goto err_object; + + if (intel_engine_has_timeslices(engine) && + sysfs_create_file(kobj, ×lice_duration_attr.attr)) + goto err_engine; + + if (intel_engine_has_preempt_reset(engine) && + sysfs_create_file(kobj, &preempt_timeout_attr.attr)) + goto err_engine; + + add_defaults(container_of(kobj, struct kobj_engine, base)); + + if (0) { +err_object: + kobject_put(kobj); +err_engine: + dev_err(kdev, "Failed to add sysfs engine '%s'\n", + engine->name); + break; + } + } +} diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.h b/drivers/gpu/drm/i915/gt/sysfs_engines.h new file mode 100644 index 000000000000..9546fffe03a7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_SYSFS_H +#define INTEL_ENGINE_SYSFS_H + +struct drm_i915_private; + +void intel_engines_add_sysfs(struct drm_i915_private *i915); + +#endif /* INTEL_ENGINE_SYSFS_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index c4c1523da7a6..861657897c0f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -169,7 +169,7 @@ void intel_guc_init_early(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - intel_guc_fw_init_early(guc); + intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC); intel_guc_ct_init_early(&guc->ct); intel_guc_log_init_early(&guc->log); intel_guc_submission_init_early(guc); @@ -207,7 +207,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) { u32 flags = 0; - if (!intel_guc_is_submission_supported(guc)) + if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; return flags; @@ -217,7 +217,7 @@ static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc) { u32 flags = 0; - if (intel_guc_is_submission_supported(guc)) { + if (intel_guc_submission_is_used(guc)) { u32 ctxnum, base; base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); @@ -333,7 +333,7 @@ int intel_guc_init(struct intel_guc *guc) ret = intel_uc_fw_init(&guc->fw); if (ret) - goto err_fetch; + goto out; ret = intel_guc_log_create(&guc->log); if (ret) @@ -348,7 +348,7 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_ads; - if (intel_guc_is_submission_supported(guc)) { + if (intel_guc_submission_is_used(guc)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later @@ -364,6 +364,8 @@ int intel_guc_init(struct intel_guc *guc) /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(gt->ggtt); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOADABLE); + return 0; err_ct: @@ -374,9 +376,8 @@ err_log: intel_guc_log_destroy(&guc->log); err_fw: intel_uc_fw_fini(&guc->fw); -err_fetch: - intel_uc_fw_cleanup_fetch(&guc->fw); - DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret); +out: + i915_probe_error(gt->i915, "failed with %d\n", ret); return ret; } @@ -384,12 +385,12 @@ void intel_guc_fini(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - if (!intel_uc_fw_is_available(&guc->fw)) + if (!intel_uc_fw_is_loadable(&guc->fw)) return; i915_ggtt_disable_guc(gt->ggtt); - if (intel_guc_is_submission_supported(guc)) + if (intel_guc_submission_is_used(guc)) intel_guc_submission_fini(guc); intel_guc_ct_fini(&guc->ct); @@ -397,9 +398,6 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); intel_uc_fw_fini(&guc->fw); - intel_uc_fw_cleanup_fetch(&guc->fw); - - intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED); } /* @@ -544,7 +542,7 @@ int intel_guc_suspend(struct intel_guc *guc) * If GuC communication is enabled but submission is not supported, * we do not need to suspend the GuC. */ - if (!intel_guc_submission_is_enabled(guc)) + if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) return 0; /* @@ -609,7 +607,7 @@ int intel_guc_resume(struct intel_guc *guc) * we do not need to resume the GuC but we do need to enable the * GuC communication on resume (above). */ - if (!intel_guc_submission_is_enabled(guc)) + if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) return 0; return intel_guc_send(guc, action, ARRAY_SIZE(action)); @@ -725,3 +723,47 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, return 0; } + +/** + * intel_guc_load_status - dump information about GuC load status + * @guc: the GuC + * @p: the &drm_printer + * + * Pretty printer for GuC load status. + */ +void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_uncore *uncore = gt->uncore; + intel_wakeref_t wakeref; + + if (!intel_guc_is_supported(guc)) { + drm_printf(p, "GuC not supported\n"); + return; + } + + if (!intel_guc_is_wanted(guc)) { + drm_printf(p, "GuC disabled\n"); + return; + } + + intel_uc_fw_dump(&guc->fw, p); + + with_intel_runtime_pm(uncore->rpm, wakeref) { + u32 status = intel_uncore_read(uncore, GUC_STATUS); + u32 i; + + drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "\tBootrom status = 0x%x\n", + (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + drm_printf(p, "\tuKernel status = 0x%x\n", + (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + drm_printf(p, "\tMIA Core status = 0x%x\n", + (status & GS_MIA_MASK) >> GS_MIA_SHIFT); + drm_puts(p, "\nScratch registers:\n"); + for (i = 0; i < 16; i++) { + drm_printf(p, "\t%2d: \t0x%x\n", + i, intel_uncore_read(uncore, SOFT_SCRATCH(i))); + } + } +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 668b067b71e2..e84ab67b317d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -39,7 +39,7 @@ struct intel_guc { void (*disable)(struct intel_guc *guc); } interrupts; - bool submission_supported; + bool submission_selected; struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; @@ -74,6 +74,11 @@ struct intel_guc { struct mutex send_mutex; }; +static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) +{ + return container_of(log, struct intel_guc, log); +} + static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { @@ -143,11 +148,17 @@ static inline bool intel_guc_is_supported(struct intel_guc *guc) return intel_uc_fw_is_supported(&guc->fw); } -static inline bool intel_guc_is_enabled(struct intel_guc *guc) +static inline bool intel_guc_is_wanted(struct intel_guc *guc) { return intel_uc_fw_is_enabled(&guc->fw); } +static inline bool intel_guc_is_used(struct intel_guc *guc) +{ + GEM_BUG_ON(__intel_uc_fw_status(&guc->fw) == INTEL_UC_FIRMWARE_SELECTED); + return intel_uc_fw_is_available(&guc->fw); +} + static inline bool intel_guc_is_fw_running(struct intel_guc *guc) { return intel_uc_fw_is_running(&guc->fw); @@ -167,11 +178,6 @@ static inline int intel_guc_sanitize(struct intel_guc *guc) return 0; } -static inline bool intel_guc_is_submission_supported(struct intel_guc *guc) -{ - return guc->submission_supported; -} - static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask) { spin_lock_irq(&guc->irq_lock); @@ -189,4 +195,6 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine); +void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c new file mode 100644 index 000000000000..fe7cb7b29a1e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <drm/drm_print.h> + +#include "gt/debugfs_gt.h" +#include "intel_guc.h" +#include "intel_guc_debugfs.h" +#include "intel_guc_log_debugfs.h" + +static int guc_info_show(struct seq_file *m, void *data) +{ + struct intel_guc *guc = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_is_supported(guc)) + return -ENODEV; + + intel_guc_load_status(guc, &p); + drm_puts(&p, "\n"); + intel_guc_log_info(&guc->log, &p); + + /* Add more as required ... */ + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info); + +void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "guc_info", &guc_info_fops, NULL }, + }; + + if (!intel_guc_is_supported(guc)) + return; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), guc); + intel_guc_log_debugfs_register(&guc->log, root); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h new file mode 100644 index 000000000000..424c26665cf1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef DEBUGFS_GUC_H +#define DEBUGFS_GUC_H + +struct intel_guc; +struct dentry; + +void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root); + +#endif /* DEBUGFS_GUC_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 3a1c47d600ea..d4a87f4c9421 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -13,20 +13,6 @@ #include "intel_guc_fw.h" #include "i915_drv.h" -/** - * intel_guc_fw_init_early() - initializes GuC firmware struct - * @guc: intel_guc struct - * - * On platforms with GuC selects firmware for uploading - */ -void intel_guc_fw_init_early(struct intel_guc *guc) -{ - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - - intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915), - INTEL_INFO(i915)->platform, INTEL_REVID(i915)); -} - static void guc_prepare_xfer(struct intel_uncore *uncore) { u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES | diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h index b5ab639d7259..0b4d2a9c9435 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h @@ -8,7 +8,6 @@ struct intel_guc; -void intel_guc_fw_init_early(struct intel_guc *guc); int intel_guc_fw_upload(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index caed0d57e704..fb10f3597ea5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -55,11 +55,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable, return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) -{ - return container_of(log, struct intel_guc, log); -} - static void guc_log_enable_flush_events(struct intel_guc_log *log) { intel_guc_enable_msg(log_to_guc(log), @@ -672,3 +667,95 @@ void intel_guc_log_handle_flush_event(struct intel_guc_log *log) { queue_work(system_highpri_wq, &log->relay.flush_work); } + +static const char * +stringify_guc_log_type(enum guc_log_buffer_type type) +{ + switch (type) { + case GUC_ISR_LOG_BUFFER: + return "ISR"; + case GUC_DPC_LOG_BUFFER: + return "DPC"; + case GUC_CRASH_DUMP_LOG_BUFFER: + return "CRASH"; + default: + MISSING_CASE(type); + } + + return ""; +} + +/** + * intel_guc_log_info - dump information about GuC log relay + * @log: the GuC log + * @p: the &drm_printer + * + * Pretty printer for GuC log info + */ +void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) +{ + enum guc_log_buffer_type type; + + if (!intel_guc_log_relay_created(log)) { + drm_puts(p, "GuC log relay not created\n"); + return; + } + + drm_puts(p, "GuC logging stats:\n"); + + drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count); + + for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", + stringify_guc_log_type(type), + log->stats[type].flush, + log->stats[type].sampled_overflow); + } +} + +/** + * intel_guc_log_dump - dump the contents of the GuC log + * @log: the GuC log + * @p: the &drm_printer + * @dump_load_err: dump the log saved on GuC load error + * + * Pretty printer for the GuC log + */ +int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, + bool dump_load_err) +{ + struct intel_guc *guc = log_to_guc(log); + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); + struct drm_i915_gem_object *obj = NULL; + u32 *map; + int i = 0; + + if (!intel_guc_is_supported(guc)) + return -ENODEV; + + if (dump_load_err) + obj = uc->load_err_log; + else if (guc->log.vma) + obj = guc->log.vma->obj; + + if (!obj) + return 0; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) { + DRM_DEBUG("Failed to pin object\n"); + drm_puts(p, "(log data unaccessible)\n"); + return PTR_ERR(map); + } + + for (i = 0; i < obj->base.size / sizeof(u32); i += 4) + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(map + i), *(map + i + 1), + *(map + i + 2), *(map + i + 3)); + + drm_puts(p, "\n"); + + i915_gem_object_unpin_map(obj); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index c252c022c5fc..11fccd0b2294 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -79,4 +79,8 @@ static inline u32 intel_guc_log_get_level(struct intel_guc_log *log) return log->level; } +void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p); +int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, + bool dump_load_err); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c new file mode 100644 index 000000000000..129e0cf7dfe2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/fs.h> +#include <drm/drm_print.h> + +#include "gt/debugfs_gt.h" +#include "intel_guc.h" +#include "intel_guc_log.h" +#include "intel_guc_log_debugfs.h" + +static int guc_log_dump_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + return intel_guc_log_dump(m->private, &p, false); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump); + +static int guc_load_err_log_dump_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + return intel_guc_log_dump(m->private, &p, true); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump); + +static int guc_log_level_get(void *data, u64 *val) +{ + struct intel_guc_log *log = data; + + if (!intel_guc_is_used(log_to_guc(log))) + return -ENODEV; + + *val = intel_guc_log_get_level(log); + + return 0; +} + +static int guc_log_level_set(void *data, u64 val) +{ + struct intel_guc_log *log = data; + + if (!intel_guc_is_used(log_to_guc(log))) + return -ENODEV; + + return intel_guc_log_set_level(log, val); +} + +DEFINE_SIMPLE_ATTRIBUTE(guc_log_level_fops, + guc_log_level_get, guc_log_level_set, + "%lld\n"); + +static int guc_log_relay_open(struct inode *inode, struct file *file) +{ + struct intel_guc_log *log = inode->i_private; + + if (!intel_guc_is_ready(log_to_guc(log))) + return -ENODEV; + + file->private_data = log; + + return intel_guc_log_relay_open(log); +} + +static ssize_t +guc_log_relay_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct intel_guc_log *log = filp->private_data; + int val; + int ret; + + ret = kstrtoint_from_user(ubuf, cnt, 0, &val); + if (ret < 0) + return ret; + + /* + * Enable and start the guc log relay on value of 1. + * Flush log relay for any other value. + */ + if (val == 1) + ret = intel_guc_log_relay_start(log); + else + intel_guc_log_relay_flush(log); + + return ret ?: cnt; +} + +static int guc_log_relay_release(struct inode *inode, struct file *file) +{ + struct intel_guc_log *log = inode->i_private; + + intel_guc_log_relay_close(log); + return 0; +} + +static const struct file_operations guc_log_relay_fops = { + .owner = THIS_MODULE, + .open = guc_log_relay_open, + .write = guc_log_relay_write, + .release = guc_log_relay_release, +}; + +void intel_guc_log_debugfs_register(struct intel_guc_log *log, + struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "guc_log_dump", &guc_log_dump_fops, NULL }, + { "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL }, + { "guc_log_level", &guc_log_level_fops, NULL }, + { "guc_log_relay", &guc_log_relay_fops, NULL }, + }; + + if (!intel_guc_is_supported(log_to_guc(log))) + return; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), log); +} + diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h new file mode 100644 index 000000000000..e8900e3d74ea --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef DEBUGFS_GUC_LOG_H +#define DEBUGFS_GUC_LOG_H + +struct intel_guc_log; +struct dentry; + +void intel_guc_log_debugfs_register(struct intel_guc_log *log, + struct dentry *root); + +#endif /* DEBUGFS_GUC_LOG_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9e42324fdecd..94eb63f309ce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); + u32 ctx_desc = rq->context->lrc.ccid; u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); guc_wq_item_append(guc, engine->guc_id, ctx_desc, @@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine, static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority | __NO_PREEMPTION; + return rq->sched.attr.priority; } static struct i915_request *schedule_in(struct i915_request *rq, int idx) @@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); } @@ -660,12 +658,9 @@ void intel_guc_submission_disable(struct intel_guc *guc) guc_proc_desc_fini(guc); } -static bool __guc_submission_support(struct intel_guc *guc) +static bool __guc_submission_selected(struct intel_guc *guc) { - /* XXX: GuC submission is unavailable for now */ - return false; - - if (!intel_guc_is_supported(guc)) + if (!intel_guc_submission_is_supported(guc)) return false; return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; @@ -673,7 +668,7 @@ static bool __guc_submission_support(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { - guc->submission_supported = __guc_submission_support(guc); + guc->submission_selected = __guc_submission_selected(guc); } bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index e402a2932592..4cf9d3e50263 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -8,7 +8,8 @@ #include <linux/types.h> -struct intel_guc; +#include "intel_guc.h" + struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); @@ -20,4 +21,20 @@ int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); +static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) +{ + /* XXX: GuC submission is unavailable for now */ + return false; +} + +static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc) +{ + return guc->submission_selected; +} + +static inline bool intel_guc_submission_is_used(struct intel_guc *guc) +{ + return intel_guc_is_used(guc) && intel_guc_submission_is_wanted(guc); +} + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 32a069841c14..65eeb44b397d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -41,7 +41,7 @@ void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; - intel_huc_fw_init_early(huc); + intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC); if (INTEL_GEN(i915) >= 11) { huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO; @@ -121,19 +121,20 @@ int intel_huc_init(struct intel_huc *huc) if (err) goto out_fini; + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE); + return 0; out_fini: intel_uc_fw_fini(&huc->fw); out: - intel_uc_fw_cleanup_fetch(&huc->fw); - DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err); + i915_probe_error(i915, "failed with %d\n", err); return err; } void intel_huc_fini(struct intel_huc *huc) { - if (!intel_uc_fw_is_available(&huc->fw)) + if (!intel_uc_fw_is_loadable(&huc->fw)) return; intel_huc_rsa_data_destroy(huc); @@ -199,9 +200,13 @@ fail: * This function reads status register to verify if HuC * firmware was successfully loaded. * - * Returns: 1 if HuC firmware is loaded and verified, - * 0 if HuC firmware is not loaded and -ENODEV if HuC - * is not present on this platform. + * Returns: + * * -ENODEV if HuC is not present on this platform, + * * -EOPNOTSUPP if HuC firmware is disabled, + * * -ENOPKG if HuC firmware was not installed, + * * -ENOEXEC if HuC firmware is invalid or mismatched, + * * 0 if HuC firmware is not running, + * * 1 if HuC firmware is authenticated and running. */ int intel_huc_check_status(struct intel_huc *huc) { @@ -209,11 +214,50 @@ int intel_huc_check_status(struct intel_huc *huc) intel_wakeref_t wakeref; u32 status = 0; - if (!intel_huc_is_supported(huc)) + switch (__intel_uc_fw_status(&huc->fw)) { + case INTEL_UC_FIRMWARE_NOT_SUPPORTED: return -ENODEV; + case INTEL_UC_FIRMWARE_DISABLED: + return -EOPNOTSUPP; + case INTEL_UC_FIRMWARE_MISSING: + return -ENOPKG; + case INTEL_UC_FIRMWARE_ERROR: + return -ENOEXEC; + default: + break; + } with_intel_runtime_pm(gt->uncore->rpm, wakeref) status = intel_uncore_read(gt->uncore, huc->status.reg); return (status & huc->status.mask) == huc->status.value; } + +/** + * intel_huc_load_status - dump information about HuC load status + * @huc: the HuC + * @p: the &drm_printer + * + * Pretty printer for HuC load status. + */ +void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p) +{ + struct intel_gt *gt = huc_to_gt(huc); + intel_wakeref_t wakeref; + + if (!intel_huc_is_supported(huc)) { + drm_printf(p, "HuC not supported\n"); + return; + } + + if (!intel_huc_is_wanted(huc)) { + drm_printf(p, "HuC disabled\n"); + return; + } + + intel_uc_fw_dump(&huc->fw, p); + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + drm_printf(p, "HuC status: 0x%08x\n", + intel_uncore_read(gt->uncore, huc->status.reg)); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 644c059fe01d..daee43b661d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -41,14 +41,22 @@ static inline bool intel_huc_is_supported(struct intel_huc *huc) return intel_uc_fw_is_supported(&huc->fw); } -static inline bool intel_huc_is_enabled(struct intel_huc *huc) +static inline bool intel_huc_is_wanted(struct intel_huc *huc) { return intel_uc_fw_is_enabled(&huc->fw); } +static inline bool intel_huc_is_used(struct intel_huc *huc) +{ + GEM_BUG_ON(__intel_uc_fw_status(&huc->fw) == INTEL_UC_FIRMWARE_SELECTED); + return intel_uc_fw_is_available(&huc->fw); +} + static inline bool intel_huc_is_authenticated(struct intel_huc *huc) { return intel_uc_fw_is_running(&huc->fw); } +void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c new file mode 100644 index 000000000000..5733c15fd123 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <drm/drm_print.h> + +#include "gt/debugfs_gt.h" +#include "intel_huc.h" +#include "intel_huc_debugfs.h" + +static int huc_info_show(struct seq_file *m, void *data) +{ + struct intel_huc *huc = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_huc_is_supported(huc)) + return -ENODEV; + + intel_huc_load_status(huc, &p); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info); + +void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "huc_info", &huc_info_fops, NULL }, + }; + + if (!intel_huc_is_supported(huc)) + return; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), huc); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h new file mode 100644 index 000000000000..be79e992f976 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef DEBUGFS_HUC_H +#define DEBUGFS_HUC_H + +struct intel_huc; +struct dentry; + +void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root); + +#endif /* DEBUGFS_HUC_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index eee193bf2cc4..e5ef509c70e8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -8,23 +8,6 @@ #include "i915_drv.h" /** - * intel_huc_fw_init_early() - initializes HuC firmware struct - * @huc: intel_huc struct - * - * On platforms with HuC selects firmware for uploading - */ -void intel_huc_fw_init_early(struct intel_huc *huc) -{ - struct intel_gt *gt = huc_to_gt(huc); - struct intel_uc *uc = >->uc; - struct drm_i915_private *i915 = gt->i915; - - intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, - intel_uc_uses_guc(uc), - INTEL_INFO(i915)->platform, INTEL_REVID(i915)); -} - -/** * intel_huc_fw_upload() - load HuC uCode to device * @huc: intel_huc structure * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h index b791269ce923..12f264ee3e0b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h @@ -8,7 +8,6 @@ struct intel_huc; -void intel_huc_fw_init_early(struct intel_huc *huc); int intel_huc_fw_upload(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index affc4d6f9ead..f518fe05c6f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -45,43 +45,43 @@ static void __confirm_options(struct intel_uc *uc) { struct drm_i915_private *i915 = uc_to_gt(uc)->i915; - DRM_DEV_DEBUG_DRIVER(i915->drm.dev, - "enable_guc=%d (guc:%s submission:%s huc:%s)\n", - i915_modparams.enable_guc, - yesno(intel_uc_uses_guc(uc)), - yesno(intel_uc_uses_guc_submission(uc)), - yesno(intel_uc_uses_huc(uc))); + drm_dbg(&i915->drm, + "enable_guc=%d (guc:%s submission:%s huc:%s)\n", + i915_modparams.enable_guc, + yesno(intel_uc_wants_guc(uc)), + yesno(intel_uc_wants_guc_submission(uc)), + yesno(intel_uc_wants_huc(uc))); if (i915_modparams.enable_guc == -1) return; if (i915_modparams.enable_guc == 0) { - GEM_BUG_ON(intel_uc_uses_guc(uc)); - GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); - GEM_BUG_ON(intel_uc_uses_huc(uc)); + GEM_BUG_ON(intel_uc_wants_guc(uc)); + GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); + GEM_BUG_ON(intel_uc_wants_huc(uc)); return; } if (!intel_uc_supports_guc(uc)) - dev_info(i915->drm.dev, + drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", i915_modparams.enable_guc, "GuC is not supported!"); if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC && !intel_uc_supports_huc(uc)) - dev_info(i915->drm.dev, + drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", i915_modparams.enable_guc, "HuC is not supported!"); if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION && !intel_uc_supports_guc_submission(uc)) - dev_info(i915->drm.dev, + drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", i915_modparams.enable_guc, "GuC submission is N/A"); if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION | ENABLE_GUC_LOAD_HUC)) - dev_info(i915->drm.dev, + drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", i915_modparams.enable_guc, "undocumented flag"); } @@ -93,7 +93,7 @@ void intel_uc_init_early(struct intel_uc *uc) __confirm_options(uc); - if (intel_uc_uses_guc(uc)) + if (intel_uc_wants_guc(uc)) uc->ops = &uc_ops_on; else uc->ops = &uc_ops_off; @@ -131,6 +131,13 @@ static void __uc_free_load_err_log(struct intel_uc *uc) i915_gem_object_put(log); } +void intel_uc_driver_remove(struct intel_uc *uc) +{ + intel_uc_fini_hw(uc); + intel_uc_fini(uc); + __uc_free_load_err_log(uc); +} + static inline bool guc_communication_enabled(struct intel_guc *guc) { return intel_guc_ct_enabled(&guc->ct); @@ -257,13 +264,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc) { int err; - GEM_BUG_ON(!intel_uc_uses_guc(uc)); + GEM_BUG_ON(!intel_uc_wants_guc(uc)); err = intel_uc_fw_fetch(&uc->guc.fw); if (err) return; - if (intel_uc_uses_huc(uc)) + if (intel_uc_wants_huc(uc)) intel_uc_fw_fetch(&uc->huc.fw); } @@ -273,33 +280,44 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc) intel_uc_fw_cleanup_fetch(&uc->guc.fw); } -static void __uc_init(struct intel_uc *uc) +static int __uc_init(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret; - GEM_BUG_ON(!intel_uc_uses_guc(uc)); + GEM_BUG_ON(!intel_uc_wants_guc(uc)); + + if (!intel_uc_uses_guc(uc)) + return 0; + + if (i915_inject_probe_failure(uc_to_gt(uc)->i915)) + return -ENOMEM; /* XXX: GuC submission is unavailable for now */ - GEM_BUG_ON(intel_uc_supports_guc_submission(uc)); + GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); ret = intel_guc_init(guc); - if (ret) { - intel_uc_fw_cleanup_fetch(&huc->fw); - return; + if (ret) + return ret; + + if (intel_uc_uses_huc(uc)) { + ret = intel_huc_init(huc); + if (ret) + goto out_guc; } - if (intel_uc_uses_huc(uc)) - intel_huc_init(huc); + return 0; + +out_guc: + intel_guc_fini(guc); + return ret; } static void __uc_fini(struct intel_uc *uc) { intel_huc_fini(&uc->huc); intel_guc_fini(&uc->guc); - - __uc_free_load_err_log(uc); } static int __uc_sanitize(struct intel_uc *uc) @@ -402,12 +420,12 @@ static int __uc_init_hw(struct intel_uc *uc) int ret, attempts; GEM_BUG_ON(!intel_uc_supports_guc(uc)); - GEM_BUG_ON(!intel_uc_uses_guc(uc)); + GEM_BUG_ON(!intel_uc_wants_guc(uc)); - if (!intel_uc_fw_is_available(&guc->fw)) { + if (!intel_uc_fw_is_loadable(&guc->fw)) { ret = __uc_check_hw(uc) || intel_uc_fw_is_overridden(&guc->fw) || - intel_uc_supports_guc_submission(uc) ? + intel_uc_wants_guc_submission(uc) ? intel_uc_fw_status_to_error(guc->fw.status) : 0; goto err_out; } @@ -459,17 +477,17 @@ static int __uc_init_hw(struct intel_uc *uc) if (ret) goto err_communication; - if (intel_uc_supports_guc_submission(uc)) + if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); - dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", + drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, guc->fw.major_ver_found, guc->fw.minor_ver_found, "submission", - enableddisabled(intel_uc_supports_guc_submission(uc))); + enableddisabled(intel_uc_uses_guc_submission(uc))); if (intel_uc_uses_huc(uc)) { - dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", + drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), huc->fw.path, huc->fw.major_ver_found, huc->fw.minor_ver_found, @@ -490,7 +508,7 @@ err_out: __uc_sanitize(uc); if (!ret) { - dev_notice(i915->drm.dev, "GuC is uninitialized\n"); + drm_notice(&i915->drm, "GuC is uninitialized\n"); /* We want to run without GuC submission */ return 0; } @@ -508,7 +526,7 @@ static void __uc_fini_hw(struct intel_uc *uc) if (!intel_guc_is_fw_running(guc)) return; - if (intel_uc_supports_guc_submission(uc)) + if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_disable(guc); if (guc_communication_enabled(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 49c913524686..9c954c589edf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -7,6 +7,7 @@ #define _INTEL_UC_H_ #include "intel_guc.h" +#include "intel_guc_submission.h" #include "intel_huc.h" #include "i915_params.h" @@ -16,7 +17,7 @@ struct intel_uc_ops { int (*sanitize)(struct intel_uc *uc); void (*init_fw)(struct intel_uc *uc); void (*fini_fw)(struct intel_uc *uc); - void (*init)(struct intel_uc *uc); + int (*init)(struct intel_uc *uc); void (*fini)(struct intel_uc *uc); int (*init_hw)(struct intel_uc *uc); void (*fini_hw)(struct intel_uc *uc); @@ -33,6 +34,7 @@ struct intel_uc { void intel_uc_init_early(struct intel_uc *uc); void intel_uc_driver_late_release(struct intel_uc *uc); +void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); @@ -40,35 +42,44 @@ void intel_uc_runtime_suspend(struct intel_uc *uc); int intel_uc_resume(struct intel_uc *uc); int intel_uc_runtime_resume(struct intel_uc *uc); -static inline bool intel_uc_supports_guc(struct intel_uc *uc) -{ - return intel_guc_is_supported(&uc->guc); -} - -static inline bool intel_uc_uses_guc(struct intel_uc *uc) -{ - return intel_guc_is_enabled(&uc->guc); -} +/* + * We need to know as early as possible if we're going to use GuC or not to + * take the correct setup paths. Additionally, once we've started loading the + * GuC, it is unsafe to keep executing without it because some parts of the HW, + * a subset of which is not cleaned on GT reset, will start expecting the GuC FW + * to be running. + * To solve both these requirements, we commit to using the microcontrollers if + * the relevant modparam is set and the blobs are found on the system. At this + * stage, the only thing that can stop us from attempting to load the blobs on + * the HW and use them is a fundamental issue (e.g. no memory for our + * structures); if we hit such a problem during driver load we're broken even + * without GuC, so there is no point in trying to fall back. + * + * Given the above, we can be in one of 4 states, with the last one implying + * we're committed to using the microcontroller: + * - Not supported: not available in HW and/or firmware not defined. + * - Supported: available in HW and firmware defined. + * - Wanted: supported + enabled in modparam. + * - In use: wanted + firmware found on the system and successfully fetched. + */ -static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc) -{ - return intel_guc_is_submission_supported(&uc->guc); +#define __uc_state_checker(x, func, state, required) \ +static inline bool intel_uc_##state##_##func(struct intel_uc *uc) \ +{ \ + return intel_##func##_is_##required(&uc->x); \ } -static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc) -{ - return intel_guc_is_submission_supported(&uc->guc); -} +#define uc_state_checkers(x, func) \ +__uc_state_checker(x, func, supports, supported) \ +__uc_state_checker(x, func, wants, wanted) \ +__uc_state_checker(x, func, uses, used) -static inline bool intel_uc_supports_huc(struct intel_uc *uc) -{ - return intel_uc_supports_guc(uc); -} +uc_state_checkers(guc, guc); +uc_state_checkers(huc, huc); +uc_state_checkers(guc, guc_submission); -static inline bool intel_uc_uses_huc(struct intel_uc *uc) -{ - return intel_huc_is_enabled(&uc->huc); -} +#undef uc_state_checkers +#undef __uc_state_checker #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ @@ -80,7 +91,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ intel_uc_ops_function(sanitize, sanitize, int, 0); intel_uc_ops_function(fetch_firmwares, init_fw, void, ); intel_uc_ops_function(cleanup_firmwares, fini_fw, void, ); -intel_uc_ops_function(init, init, void, ); +intel_uc_ops_function(init, init, int, 0); intel_uc_ops_function(fini, fini, void, ); intel_uc_ops_function(init_hw, init_hw, int, 0); intel_uc_ops_function(fini_hw, fini_hw, void, ); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c new file mode 100644 index 000000000000..9d16b784aa0d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "intel_guc_debugfs.h" +#include "intel_huc_debugfs.h" +#include "intel_uc.h" +#include "intel_uc_debugfs.h" + +void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) +{ + struct dentry *root; + + if (!gt_root) + return; + + /* GuC and HuC go always in pair, no need to check both */ + if (!intel_uc_supports_guc(uc)) + return; + + root = debugfs_create_dir("uc", gt_root); + if (IS_ERR(root)) + return; + + intel_guc_debugfs_register(&uc->guc, root); + intel_huc_debugfs_register(&uc->huc, root); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h new file mode 100644 index 000000000000..010ce250d223 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef DEBUGFS_UC_H +#define DEBUGFS_UC_H + +struct intel_uc; +struct dentry; + +void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root); + +#endif /* DEBUGFS_UC_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 8ee0a0c7f447..e1caae93996d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -11,26 +11,32 @@ #include "intel_uc_fw_abi.h" #include "i915_drv.h" -static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) +static inline struct intel_gt * +____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) { - GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); - if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) + if (type == INTEL_UC_FW_TYPE_GUC) return container_of(uc_fw, struct intel_gt, uc.guc.fw); - GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC); + GEM_BUG_ON(type != INTEL_UC_FW_TYPE_HUC); return container_of(uc_fw, struct intel_gt, uc.huc.fw); } +static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw) +{ + GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED); + return ____uc_fw_to_gt(uc_fw, uc_fw->type); +} + #ifdef CONFIG_DRM_I915_DEBUG_GUC void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, enum intel_uc_fw_status status) { uc_fw->__status = status; - DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev, - "%s firmware -> %s\n", - intel_uc_fw_type_repr(uc_fw->type), - status == INTEL_UC_FIRMWARE_SELECTED ? - uc_fw->path : intel_uc_fw_status_repr(status)); + drm_dbg(&__uc_fw_to_gt(uc_fw)->i915->drm, + "%s firmware -> %s\n", + intel_uc_fw_type_repr(uc_fw->type), + status == INTEL_UC_FIRMWARE_SELECTED ? + uc_fw->path : intel_uc_fw_status_repr(status)); } #endif @@ -43,7 +49,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * features. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ @@ -187,17 +193,15 @@ static void __uc_fw_user_override(struct intel_uc_fw *uc_fw) * intel_uc_fw_init_early - initialize the uC object and select the firmware * @uc_fw: uC firmware * @type: type of uC - * @supported: is uC support possible - * @platform: platform identifier - * @rev: hardware revision * * Initialize the state of our uC object and relevant tracking and select the * firmware to fetch and load. */ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, - enum intel_uc_fw_type type, bool supported, - enum intel_platform platform, u8 rev) + enum intel_uc_fw_type type) { + struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915; + /* * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status * before we're looked at the HW caps to see if we have uc support @@ -208,8 +212,10 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, uc_fw->type = type; - if (supported) { - __uc_fw_auto_select(uc_fw, platform, rev); + if (HAS_GT_UC(i915)) { + __uc_fw_auto_select(uc_fw, + INTEL_INFO(i915)->platform, + INTEL_REVID(i915)); __uc_fw_user_override(uc_fw); } @@ -279,7 +285,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) err = i915_inject_probe_error(i915, -ENXIO); if (err) - return err; + goto fail; __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); @@ -290,7 +296,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) /* Check the size of the blob before examining buffer contents */ if (unlikely(fw->size < sizeof(struct uc_css_header))) { - dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n", + drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, fw->size, sizeof(struct uc_css_header)); err = -ENODATA; @@ -303,7 +309,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - css->exponent_size_dw) * sizeof(u32); if (unlikely(size != sizeof(struct uc_css_header))) { - dev_warn(dev, + drm_warn(&i915->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, fw->size, sizeof(struct uc_css_header)); @@ -316,7 +322,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) /* now RSA */ if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) { - dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n", + drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, css->key_size_dw, UOS_RSA_SCRATCH_COUNT); err = -EPROTO; @@ -327,7 +333,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size; if (unlikely(fw->size < size)) { - dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n", + drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, fw->size, size); err = -ENOEXEC; @@ -337,7 +343,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) /* Sanity check whether this fw is not larger than whole WOPCM memory */ size = __intel_uc_fw_get_upload_size(uc_fw); if (unlikely(size >= i915->wopcm.size)) { - dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n", + drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, size, (size_t)i915->wopcm.size); err = -E2BIG; @@ -352,7 +358,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); @@ -380,9 +386,9 @@ fail: INTEL_UC_FIRMWARE_MISSING : INTEL_UC_FIRMWARE_ERROR); - dev_notice(dev, "%s firmware %s: fetch failed with error %d\n", + drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - dev_info(dev, "%s firmware(s) can be downloaded from %s\n", + drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); release_firmware(fw); /* OK even if fw is NULL */ @@ -467,7 +473,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) /* Wait for DMA to finish */ ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100); if (ret) - dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n", + drm_err(>->i915->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", intel_uc_fw_type_repr(uc_fw->type), intel_uncore_read_fw(uncore, DMA_CTRL)); @@ -501,7 +507,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) if (err) return err; - if (!intel_uc_fw_is_available(uc_fw)) + if (!intel_uc_fw_is_loadable(uc_fw)) return -ENOEXEC; /* Call custom loader */ @@ -544,7 +550,10 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { - intel_uc_fw_cleanup_fetch(uc_fw); + if (i915_gem_object_has_pinned_pages(uc_fw->obj)) + i915_gem_object_unpin_pages(uc_fw->obj); + + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 1f30543d0d2d..23d3a423ac0f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -29,8 +29,11 @@ struct intel_gt; * | | SELECTED | * +------------+- / | \ -+ * | | MISSING <--/ | \--> ERROR | - * | fetch | | | - * | | /------> AVAILABLE <---<-----------\ | + * | fetch | V | + * | | AVAILABLE | + * +------------+- | -+ + * | init | V | + * | | /------> LOADABLE <----<-----------\ | * +------------+- \ / \ \ \ -+ * | | FAIL <--< \--> TRANSFERRED \ | * | upload | \ / \ / | @@ -46,6 +49,7 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ @@ -115,6 +119,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) return "ERROR"; case INTEL_UC_FIRMWARE_AVAILABLE: return "AVAILABLE"; + case INTEL_UC_FIRMWARE_LOADABLE: + return "LOADABLE"; case INTEL_UC_FIRMWARE_FAIL: return "FAIL"; case INTEL_UC_FIRMWARE_TRANSFERRED: @@ -143,6 +149,7 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) case INTEL_UC_FIRMWARE_SELECTED: return -ESTALE; case INTEL_UC_FIRMWARE_AVAILABLE: + case INTEL_UC_FIRMWARE_LOADABLE: case INTEL_UC_FIRMWARE_TRANSFERRED: case INTEL_UC_FIRMWARE_RUNNING: return 0; @@ -184,6 +191,11 @@ static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw) return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE; } +static inline bool intel_uc_fw_is_loadable(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_LOADABLE; +} + static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw) { return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED; @@ -202,7 +214,7 @@ static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) { if (intel_uc_fw_is_loaded(uc_fw)) - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOADABLE); } static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) @@ -227,8 +239,7 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) } void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, - enum intel_uc_fw_type type, bool supported, - enum intel_platform platform, u8 rev); + enum intel_uc_fw_type type); int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw); void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags); |