diff options
62 files changed, 6154 insertions, 5873 deletions
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 2fe5952e90f1..87aaffc22920 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -70,6 +70,9 @@ Frontbuffer Tracking .. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c :doc: frontbuffer tracking +.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h + :internal: + .. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c :internal: diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 684fc1cd08fa..dda724f04445 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -25,7 +25,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_context.o \ - i915_gem_debug.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ @@ -33,6 +32,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_gtt.o \ i915_gem.o \ i915_gem_render_state.o \ + i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ @@ -40,6 +40,7 @@ i915-y += i915_cmd_parser.o \ i915_gpu_error.o \ i915_trace_points.o \ intel_breadcrumbs.o \ + intel_engine_cs.o \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b0fd6a7b0603..1db829c8b912 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -62,23 +62,23 @@ * The parser always rejects such commands. * * The majority of the problematic commands fall in the MI_* range, with only a - * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). + * few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW). * * Implementation: - * Each ring maintains tables of commands and registers which the parser uses in - * scanning batch buffers submitted to that ring. + * Each engine maintains tables of commands and registers which the parser + * uses in scanning batch buffers submitted to that engine. * * Since the set of commands that the parser must check for is significantly * smaller than the number of commands supported, the parser tables contain only * those commands required by the parser. This generally works because command * opcode ranges have standard command length encodings. So for commands that * the parser does not need to check, it can easily skip them. This is - * implemented via a per-ring length decoding vfunc. + * implemented via a per-engine length decoding vfunc. * * Unfortunately, there are a number of commands that do not follow the standard * length encoding for their opcode range, primarily amongst the MI_* commands. * To handle this, the parser provides a way to define explicit "skip" entries - * in the per-ring command tables. + * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories * mentioned above: rejected, master-only, register whitelist. The parser @@ -603,7 +603,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_engine_cs *engine, +static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) { @@ -624,8 +624,10 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine, u32 curr = desc->cmd.value & desc->cmd.mask; if (curr < previous) { - DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", - engine->id, i, j, curr, previous); + DRM_ERROR("CMD: %s [%d] command table not sorted: " + "table=%d entry=%d cmd=0x%08X prev=0x%08X\n", + engine->name, engine->id, + i, j, curr, previous); ret = false; } @@ -636,7 +638,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine, return ret; } -static bool check_sorted(int ring_id, +static bool check_sorted(const struct intel_engine_cs *engine, const struct drm_i915_reg_descriptor *reg_table, int reg_count) { @@ -648,8 +650,10 @@ static bool check_sorted(int ring_id, u32 curr = i915_mmio_reg_offset(reg_table[i].addr); if (curr < previous) { - DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", - ring_id, i, curr, previous); + DRM_ERROR("CMD: %s [%d] register table not sorted: " + "entry=%d reg=0x%08X prev=0x%08X\n", + engine->name, engine->id, + i, curr, previous); ret = false; } @@ -666,7 +670,7 @@ static bool validate_regs_sorted(struct intel_engine_cs *engine) for (i = 0; i < engine->reg_table_count; i++) { table = &engine->reg_tables[i]; - if (!check_sorted(engine->id, table->regs, table->num_regs)) + if (!check_sorted(engine, table->regs, table->num_regs)) return false; } @@ -736,7 +740,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) } /** - * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer + * intel_engine_init_cmd_parser() - set cmd parser related fields for an engine * @engine: the engine to initialize * * Optionally initializes fields related to batch buffer command parsing in the @@ -745,7 +749,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) * * Return: non-zero if initialization fails */ -int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; @@ -806,8 +810,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; default: - DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", - engine->id); + MISSING_CASE(engine->id); BUG(); } @@ -829,13 +832,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) } /** - * i915_cmd_parser_fini_ring() - clean up cmd parser related fields + * intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields * @engine: the engine to clean up * * Releases any resources related to command parsing that may have been - * initialized for the specified ring. + * initialized for the specified engine. */ -void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine) +void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { if (!engine->needs_cmd_parser) return; @@ -866,9 +869,9 @@ find_cmd_in_table(struct intel_engine_cs *engine, * Returns a pointer to a descriptor for the command specified by cmd_header. * * The caller must supply space for a default descriptor via the default_desc - * parameter. If no descriptor for the specified command exists in the ring's + * parameter. If no descriptor for the specified command exists in the engine's * command parser tables, this function fills in default_desc based on the - * ring's default length encoding and returns default_desc. + * engine's default length encoding and returns default_desc. */ static const struct drm_i915_cmd_descriptor* find_cmd(struct intel_engine_cs *engine, @@ -1023,15 +1026,16 @@ unpin_src: } /** - * i915_needs_cmd_parser() - should a given ring use software command parsing? + * intel_engine_needs_cmd_parser() - should a given engine use software + * command parsing? * @engine: the engine in question * * Only certain platforms require software batch buffer command parsing, and * only when enabled via module parameter. * - * Return: true if the ring requires software command parsing + * Return: true if the engine requires software command parsing */ -bool i915_needs_cmd_parser(struct intel_engine_cs *engine) +bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) { if (!engine->needs_cmd_parser) return false; @@ -1078,8 +1082,8 @@ static bool check_cmd(const struct intel_engine_cs *engine, reg_addr); if (!reg) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", - reg_addr, *cmd, engine->id); + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", + reg_addr, *cmd, engine->exec_id); return false; } @@ -1159,11 +1163,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, desc->bits[i].mask; if (dword != desc->bits[i].expected) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n", *cmd, desc->bits[i].mask, desc->bits[i].expected, - dword, engine->id); + dword, engine->exec_id); return false; } } @@ -1189,12 +1193,12 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int i915_parse_cmds(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master) +int intel_engine_cmd_parser(struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master) { u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; @@ -1295,7 +1299,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv) { - if (i915_needs_cmd_parser(engine)) { + if (intel_engine_needs_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9b03cb2813bd..f62285c1ed7f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data) static char get_active_flag(struct drm_i915_gem_object *obj) { - return obj->active ? '*' : ' '; + return i915_gem_object_is_active(obj) ? '*' : ' '; } static char get_pin_flag(struct drm_i915_gem_object *obj) @@ -101,7 +101,7 @@ static char get_pin_flag(struct drm_i915_gem_object *obj) static char get_tiling_flag(struct drm_i915_gem_object *obj) { - switch (obj->tiling_mode) { + switch (i915_gem_object_get_tiling(obj)) { default: case I915_TILING_NONE: return ' '; case I915_TILING_X: return 'X'; @@ -125,7 +125,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->is_ggtt && drm_mm_node_allocated(&vma->node)) + if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node)) size += vma->node.size; } @@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct intel_engine_cs *engine; struct i915_vma *vma; + unsigned int frontbuffer_bits; int pin_count = 0; enum intel_engine_id id; @@ -155,17 +156,20 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[id])); + i915_gem_active_get_seqno(&obj->last_read[id], + &obj->base.dev->struct_mutex)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), + i915_gem_active_get_seqno(&obj->last_write, + &obj->base.dev->struct_mutex), + i915_gem_active_get_seqno(&obj->last_fence, + &obj->base.dev->struct_mutex), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); @@ -174,10 +178,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (obj->fence_reg != I915_FENCE_REG_NONE) seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", - vma->is_ggtt ? "g" : "pp", + i915_vma_is_ggtt(vma) ? "g" : "pp", vma->node.start, vma->node.size); - if (vma->is_ggtt) + if (i915_vma_is_ggtt(vma)) seq_printf(m, ", type: %u", vma->ggtt_view.type); seq_puts(m, ")"); } @@ -192,11 +199,15 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", - i915_gem_request_get_engine(obj->last_write_req)->name); - if (obj->frontbuffer_bits) - seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); + + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); + if (engine) + seq_printf(m, " (%s)", engine->name); + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (frontbuffer_bits) + seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } static int i915_gem_object_list_info(struct seq_file *m, void *data) @@ -338,46 +349,29 @@ static int per_file_stats(int id, void *ptr, void *data) stats->count++; stats->total += obj->base.size; - + if (!obj->bind_count) + stats->unbound += obj->base.size; if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - if (USES_FULL_PPGTT(obj->base.dev)) { - list_for_each_entry(vma, &obj->vma_list, obj_link) { - struct i915_hw_ppgtt *ppgtt; - - if (!drm_mm_node_allocated(&vma->node)) - continue; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; - if (vma->is_ggtt) { - stats->global += obj->base.size; - continue; - } + if (i915_vma_is_ggtt(vma)) { + stats->global += vma->node.size; + } else { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); - ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->file_priv != stats->file_priv) + if (ppgtt->base.file != stats->file_priv) continue; - - if (obj->active) /* XXX per-vma statistic */ - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - - return 0; - } - } else { - if (i915_gem_obj_ggtt_bound(obj)) { - stats->global += obj->base.size; - if (obj->active) - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - return 0; } - } - if (!list_empty(&obj->global_list)) - stats->unbound += obj->base.size; + if (i915_vma_is_active(vma)) + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; + } return 0; } @@ -425,8 +419,8 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { if (ctx->engine[n].state) per_file_stats(0, ctx->engine[n].state, data); - if (ctx->engine[n].ringbuf) - per_file_stats(0, ctx->engine[n].ringbuf->obj, data); + if (ctx->engine[n].ring) + per_file_stats(0, ctx->engine[n].ring->obj, data); } return 0; @@ -754,13 +748,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data) int count; count = 0; - list_for_each_entry(req, &engine->request_list, list) + list_for_each_entry(req, &engine->request_list, link) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->request_list, list) { + list_for_each_entry(req, &engine->request_list, link) { struct task_struct *task; rcu_read_lock(); @@ -768,7 +762,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data) if (req->pid) task = pid_task(req->pid, PIDTYPE_PID); seq_printf(m, " %x @ %d: %s [%d]\n", - req->seqno, + req->fence.seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "<unknown>", task ? task->pid : -1); @@ -1205,8 +1199,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - if (IS_GEN5(dev)) { u16 rgvswctl = I915_READ16(MEMSWCTL); u16 rgvstat = I915_READ16(MEMSTAT_ILK); @@ -1381,6 +1373,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); seq_printf(m, "Min freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); seq_printf(m, "Max freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); seq_printf(m, @@ -1419,7 +1413,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_engine_id(engine, dev_priv, id) { - acthd[id] = intel_ring_get_active_head(engine); + acthd[id] = intel_engine_get_active_head(engine); seqno[id] = intel_engine_get_seqno(engine); } @@ -1602,6 +1596,7 @@ static int gen6_drpc_info(struct seq_file *m) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0, ret; @@ -1629,6 +1624,10 @@ static int gen6_drpc_info(struct seq_file *m) rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); + if (INTEL_INFO(dev)->gen >= 9) { + gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); + gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); + } mutex_unlock(&dev->struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -1647,6 +1646,12 @@ static int gen6_drpc_info(struct seq_file *m) yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_INFO(dev)->gen >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } seq_printf(m, "Deep RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); seq_printf(m, "Deepest RC6 Enabled: %s\n", @@ -1675,6 +1680,14 @@ static int gen6_drpc_info(struct seq_file *m) seq_printf(m, "Core Power Down: %s\n", yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_INFO(dev)->gen >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } /* Not exactly sure what this is */ seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n", @@ -1692,7 +1705,7 @@ static int gen6_drpc_info(struct seq_file *m) GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); seq_printf(m, "RC6++ voltage: %dmV\n", GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); - return 0; + return i915_forcewake_domains(m, NULL); } static int i915_drpc_info(struct seq_file *m, void *unused) @@ -1896,8 +1909,6 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); if (ret) goto out; @@ -2019,12 +2030,11 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return 0; } -static void describe_ctx_ringbuf(struct seq_file *m, - struct intel_ringbuffer *ringbuf) +static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", - ringbuf->space, ringbuf->head, ringbuf->tail, - ringbuf->last_retired_head); + ring->space, ring->head, ring->tail, + ring->last_retired_head); } static int i915_context_status(struct seq_file *m, void *unused) @@ -2068,8 +2078,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state); - if (ce->ringbuf) - describe_ctx_ringbuf(m, ce->ringbuf); + if (ce->ring) + describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); } @@ -2467,13 +2477,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) list_empty(&file_priv->rps.link) ? "" : ", active"); rcu_read_unlock(); } - seq_printf(m, "Semaphore boosts: %d%s\n", - dev_priv->rps.semaphores.boosts, - list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active"); - seq_printf(m, "MMIO flip boosts: %d%s\n", - dev_priv->rps.mmioflips.boosts, - list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active"); - seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts); + seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts); spin_unlock(&dev_priv->rps.client_lock); mutex_unlock(&dev->filelist_mutex); @@ -3228,7 +3232,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) enum intel_engine_id id; int j, ret; - if (!i915_semaphore_is_enabled(dev_priv)) { + if (!i915.semaphores) { seq_puts(m, "Semaphores are disabled\n"); return 0; } @@ -3621,7 +3625,6 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, while (n_entries > 0) { struct intel_pipe_crc_entry *entry = &pipe_crc->entries[pipe_crc->tail]; - int ret; if (CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) @@ -3638,8 +3641,7 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, spin_unlock_irq(&pipe_crc->lock); - ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN); - if (ret == PIPE_CRC_LINE_LEN) + if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN)) return -EFAULT; user_buf += PIPE_CRC_LINE_LEN; @@ -4921,7 +4923,7 @@ i915_drop_caches_set(void *data, u64 val) return ret; if (val & DROP_ACTIVE) { - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) goto unlock; } @@ -4950,20 +4952,11 @@ i915_max_freq_get(void *data, u64 *val) { struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; if (INTEL_INFO(dev)->gen < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } @@ -4978,8 +4971,6 @@ i915_max_freq_set(void *data, u64 val) if (INTEL_INFO(dev)->gen < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@ -5017,20 +5008,11 @@ i915_min_freq_get(void *data, u64 *val) { struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } @@ -5042,11 +5024,9 @@ i915_min_freq_set(void *data, u64 val) u32 hw_max, hw_min; int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@ -5268,7 +5248,8 @@ static void broadwell_sseu_device_status(struct drm_device *dev, static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = to_i915(node->minor->dev); + struct drm_device *dev = &dev_priv->drm; struct sseu_dev_status stat; if (INTEL_INFO(dev)->gen < 8) @@ -5298,6 +5279,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused) seq_puts(m, "SSEU Device Status\n"); memset(&stat, 0, sizeof(stat)); + + intel_runtime_pm_get(dev_priv); + if (IS_CHERRYVIEW(dev)) { cherryview_sseu_device_status(dev, &stat); } else if (IS_BROADWELL(dev)) { @@ -5305,6 +5289,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused) } else if (INTEL_INFO(dev)->gen >= 9) { gen9_sseu_device_status(dev, &stat); } + + intel_runtime_pm_put(dev_priv); + seq_printf(m, " Enabled Slice Total: %u\n", stat.slice_total); seq_printf(m, " Enabled Subslice Total: %u\n", diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 40cd16cf9772..57eb380a2c21 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -228,27 +228,6 @@ static void intel_detect_pch(struct drm_device *dev) pci_dev_put(pch); } -bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 6) - return false; - - if (i915.semaphores >= 0) - return i915.semaphores; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915.enable_execlists) - return false; - -#ifdef CONFIG_INTEL_IOMMU - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) - return false; -#endif - - return true; -} - static int i915_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -324,7 +303,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_HAS_SEMAPHORES: - value = i915_semaphore_is_enabled(dev_priv); + value = i915.semaphores; break; case I915_PARAM_HAS_PRIME_VMAP_FLUSH: value = 1; @@ -999,6 +978,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) i915.enable_ppgtt = intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); + + i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); + DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); } /** @@ -1011,8 +993,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) static int i915_driver_init_hw(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint32_t aperture_size; int ret; if (i915_inject_load_failure()) @@ -1022,16 +1002,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) intel_sanitize_options(dev_priv); - ret = i915_ggtt_init_hw(dev); + ret = i915_ggtt_probe_hw(dev_priv); if (ret) return ret; - ret = i915_ggtt_enable_hw(dev); - if (ret) { - DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; - } - /* WARNING: Apparently we must kick fbdev drivers before vgacon, * otherwise the vga fbdev driver falls over. */ ret = i915_kick_out_firmware_fb(dev_priv); @@ -1046,6 +1020,16 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) goto out_ggtt; } + ret = i915_ggtt_init_hw(dev_priv); + if (ret) + return ret; + + ret = i915_ggtt_enable_hw(dev_priv); + if (ret) { + DRM_ERROR("failed to enable GGTT\n"); + goto out_ggtt; + } + pci_set_master(dev->pdev); /* overlay on gen2 is broken and can't address above 1G */ @@ -1058,7 +1042,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) } } - /* 965GM sometimes incorrectly writes to hardware status page (HWS) * using 32bit addressing, overwriting memory if HWS is located * above 4GB. @@ -1077,19 +1060,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) } } - aperture_size = ggtt->mappable_end; - - ggtt->mappable = - io_mapping_create_wc(ggtt->mappable_base, - aperture_size); - if (!ggtt->mappable) { - ret = -EIO; - goto out_ggtt; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, - aperture_size); - pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); @@ -1118,7 +1088,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) return 0; out_ggtt: - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); return ret; } @@ -1130,15 +1100,12 @@ out_ggtt: static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; if (dev->pdev->msi_enabled) pci_disable_msi(dev->pdev); pm_qos_remove_request(&dev_priv->pm_qos); - arch_phys_wc_del(ggtt->mtrr); - io_mapping_free(ggtt->mappable); - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); } /** @@ -1343,7 +1310,7 @@ void i915_driver_unload(struct drm_device *dev) i915_destroy_error_state(dev); /* Flush any outstanding unpin_work. */ - flush_workqueue(dev_priv->wq); + drain_workqueue(dev_priv->wq); intel_guc_fini(dev); i915_gem_fini(dev); @@ -1458,8 +1425,6 @@ static int i915_drm_suspend(struct drm_device *dev) intel_guc_suspend(dev); - intel_suspend_gt_powersave(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); @@ -1586,15 +1551,13 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(dev_priv); - ret = i915_ggtt_enable_hw(dev); + ret = i915_ggtt_enable_hw(dev_priv); if (ret) DRM_ERROR("failed to re-enable GGTT\n"); intel_csr_ucode_resume(dev_priv); - mutex_lock(&dev->struct_mutex); - i915_gem_restore_gtt_mappings(dev); - mutex_unlock(&dev->struct_mutex); + i915_gem_resume(dev); i915_restore_state(dev); intel_opregion_setup(dev_priv); @@ -1652,6 +1615,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_autoenable_gt_powersave(dev_priv); drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); @@ -1778,8 +1742,6 @@ int i915_reset(struct drm_i915_private *dev_priv) unsigned reset_counter; int ret; - intel_reset_gt_powersave(dev_priv); - mutex_lock(&dev->struct_mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ @@ -1835,8 +1797,7 @@ int i915_reset(struct drm_i915_private *dev_priv) * previous concerns that it doesn't respond well to some forms * of re-init after reset. */ - if (INTEL_INFO(dev)->gen > 5) - intel_enable_gt_powersave(dev_priv); + intel_autoenable_gt_powersave(dev_priv); return 0; @@ -2462,7 +2423,6 @@ static int intel_runtime_resume(struct device *device) * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev); - gen6_update_ring_freq(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); @@ -2618,6 +2578,7 @@ static struct drm_driver driver = { .postclose = i915_driver_postclose, .set_busid = drm_pci_set_busid, + .gem_close_object = i915_gem_close_object, .gem_free_object = i915_gem_free_object, .gem_vm_ops = &i915_gem_vm_ops, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 21f939074abc..c36d17659ebe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -61,6 +61,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" +#include "i915_gem_request.h" #include "intel_gvt.h" @@ -69,7 +70,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20160711" +#define DRIVER_DATE "20160808" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -401,7 +402,7 @@ struct drm_i915_file_private { unsigned boosts; } rps; - unsigned int bsd_ring; + unsigned int bsd_engine; }; /* Used by dp and fdi links */ @@ -431,8 +432,6 @@ void intel_link_compute_m_n(int bpp, int nlanes, #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -#define WATCH_LISTS 0 - struct opregion_header; struct opregion_acpi; struct opregion_swsci; @@ -511,13 +510,13 @@ struct drm_i915_error_state { struct intel_display_error_state *display; struct drm_i915_error_object *semaphore_obj; - struct drm_i915_error_ring { - bool valid; + struct drm_i915_error_engine { + int engine_id; /* Software tracked state */ bool waiting; int num_waiters; int hangcheck_score; - enum intel_ring_hangcheck_action hangcheck_action; + enum intel_engine_hangcheck_action hangcheck_action; int num_requests; /* our own tracking of ring head and tail */ @@ -577,7 +576,7 @@ struct drm_i915_error_state { pid_t pid; char comm[TASK_COMM_LEN]; - } ring[I915_NUM_ENGINES]; + } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { u32 size; @@ -592,7 +591,7 @@ struct drm_i915_error_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 ring:4; + s32 engine:4; u32 cache_level:3; } **active_bo, **pinned_bo; @@ -893,7 +892,7 @@ struct i915_gem_context { struct intel_context { struct drm_i915_gem_object *state; - struct intel_ringbuffer *ringbuf; + struct intel_ring *ring; struct i915_vma *lrc_vma; uint32_t *lrc_reg_state; u64 lrc_desc; @@ -908,6 +907,7 @@ struct i915_gem_context { struct list_head link; u8 remap_slice; + bool closed:1; }; enum fb_op_origin { @@ -1173,6 +1173,7 @@ struct intel_gen6_power_mgmt { u8 max_freq_softlimit; /* Max frequency permitted by the driver */ u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ u8 idle_freq; /* Frequency to request when we are idle */ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ u8 rp1_freq; /* "less than" RP0 power/freqency */ @@ -1190,11 +1191,9 @@ struct intel_gen6_power_mgmt { bool client_boost; bool enabled; - struct delayed_work delayed_resume_work; + struct delayed_work autoenable_work; unsigned boosts; - struct intel_rps_client semaphores, mmioflips; - /* manual wa residency calculations */ struct intel_rps_ei up_ei, down_ei; @@ -1319,7 +1318,6 @@ struct i915_gem_mm { struct notifier_block oom_notifier; struct notifier_block vmap_notifier; struct shrinker shrinker; - bool shrinker_no_lock_stealing; /** LRU list of objects with fence regs on them. */ struct list_head fence_list; @@ -1331,7 +1329,7 @@ struct i915_gem_mm { bool interruptible; /* the indicator for dispatch video commands on two BSD rings */ - unsigned int bsd_ring_dispatch_index; + unsigned int bsd_engine_dispatch_index; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; @@ -1670,7 +1668,7 @@ struct intel_pipe_crc { }; struct i915_frontbuffer_tracking { - struct mutex lock; + spinlock_t lock; /* * Tracking bits for delayed frontbuffer flushing du to gpu activity or @@ -1705,18 +1703,6 @@ struct i915_virtual_gpu { bool active; }; -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - uint32_t dispatch_flags; - uint32_t args_batch_start_offset; - uint64_t batch_obj_vm_offset; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch_obj; - struct i915_gem_context *ctx; - struct drm_i915_gem_request *request; -}; - /* used in computing the new watermarks state */ struct intel_wm_config { unsigned int num_pipes_active; @@ -1769,7 +1755,7 @@ struct drm_i915_private { struct i915_gem_context *kernel_context; struct intel_engine_cs engine[I915_NUM_ENGINES]; struct drm_i915_gem_object *semaphore_obj; - uint32_t last_seqno, next_seqno; + u32 next_seqno; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -2016,12 +2002,7 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { - int (*execbuf_submit)(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); - int (*init_engines)(struct drm_device *dev); void (*cleanup_engine)(struct intel_engine_cs *engine); - void (*stop_engine)(struct intel_engine_cs *engine); /** * Is the GPU currently considered idle, or busy executing @@ -2144,8 +2125,6 @@ struct drm_i915_gem_object_ops { */ #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8 -#define INTEL_FRONTBUFFER_BITS \ - (INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES) #define INTEL_FRONTBUFFER_PRIMARY(pipe) \ (1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) #define INTEL_FRONTBUFFER_CURSOR(pipe) \ @@ -2169,18 +2148,21 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; - struct list_head engine_list[I915_NUM_ENGINES]; /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; struct list_head batch_pool_link; + unsigned long flags; /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on * inactive (ready to be unbound) list. */ - unsigned int active:I915_NUM_ENGINES; +#define I915_BO_ACTIVE_SHIFT 0 +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) +#define __I915_BO_ACTIVE(bo) \ + ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * This is set if the object has been written to since last bound @@ -2201,10 +2183,6 @@ struct drm_i915_gem_object { unsigned int madv:2; /** - * Current tiling mode for the object. - */ - unsigned int tiling_mode:2; - /** * Whether the tiling parameters for the currently associated fence * register have changed. Note that for the purposes of tracking * tiling changes we also treat the unfenced register, the register @@ -2234,9 +2212,17 @@ struct drm_i915_gem_object { unsigned int cache_level:3; unsigned int cache_dirty:1; - unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS; + atomic_t frontbuffer_bits; + + /** Current tiling stride for the object, if it's tiled. */ + unsigned int tiling_and_stride; +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) +#define STRIDE_MASK (~TILING_MASK) unsigned int has_wc_mmap; + /** Count of VMA actually bound by this object */ + unsigned int bind_count; unsigned int pin_display; struct sg_table *pages; @@ -2256,14 +2242,10 @@ struct drm_i915_gem_object { * requests on one ring where the write request is older than the * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. - * */ - struct drm_i915_gem_request *last_read_req[I915_NUM_ENGINES]; - struct drm_i915_gem_request *last_write_req; - /** Breadcrumb of last fenced GPU access to the buffer. */ - struct drm_i915_gem_request *last_fenced_req; - - /** Current tiling stride for the object, if it's tiled. */ - uint32_t stride; + */ + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; @@ -2287,7 +2269,56 @@ struct drm_i915_gem_object { } userptr; }; }; -#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) + +static inline struct drm_i915_gem_object * +to_intel_bo(struct drm_gem_object *gem) +{ + /* Assert that to_intel_bo(NULL) == NULL */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); + + return container_of(gem, struct drm_i915_gem_object, base); +} + +static inline struct drm_i915_gem_object * +i915_gem_object_lookup(struct drm_file *file, u32 handle) +{ + return to_intel_bo(drm_gem_object_lookup(file, handle)); +} + +__deprecated +extern struct drm_gem_object * +drm_gem_object_lookup(struct drm_file *file, u32 handle); + +__attribute__((nonnull)) +static inline struct drm_i915_gem_object * +i915_gem_object_get(struct drm_i915_gem_object *obj) +{ + drm_gem_object_reference(&obj->base); + return obj; +} + +__deprecated +extern void drm_gem_object_reference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put(struct drm_i915_gem_object *obj) +{ + drm_gem_object_unreference(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj) +{ + drm_gem_object_unreference_unlocked(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) @@ -2295,6 +2326,55 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; } +static inline unsigned long +i915_gem_object_get_active(const struct drm_i915_gem_object *obj) +{ + return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; +} + +static inline bool +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_active(obj); +} + +static inline void +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline void +i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline bool +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, + int engine) +{ + return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline unsigned int +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & TILING_MASK; +} + +static inline bool +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; +} + +static inline unsigned int +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & STRIDE_MASK; +} + /* * Optimised SGL iterator for GEM objects */ @@ -2365,171 +2445,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init - */ -struct drm_i915_gem_request { - struct kref ref; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - struct intel_engine_cs *engine; - struct intel_signal_node signaling; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - - /** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - - /** Position in the ringbuffer of the start of the request */ - u32 head; - - /** - * Position in the ringbuffer of the start of the postfix. - * This is required to calculate the maximum available ringbuffer - * space without overwriting the postfix. - */ - u32 postfix; - - /** Position in the ringbuffer of the end of the whole request */ - u32 tail; - - /** Preallocate space in the ringbuffer for the emitting the request */ - u32 reserved_space; - - /** - * Context and ring buffer related to this request - * Contexts are refcounted, so when this request is associated with a - * context, we must increment the context's refcount, to guarantee that - * it persists while any request is linked to it. Requests themselves - * are also refcounted, so the request will only be freed when the last - * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the - * context. - */ - struct i915_gem_context *ctx; - struct intel_ringbuffer *ringbuf; - - /** - * Context related to the previous request. - * As the contexts are accessed by the hardware until the switch is - * completed to a new context, the hardware may still be writing - * to the context object after the breadcrumb is visible. We must - * not unpin/unbind/prune that object whilst still active and so - * we keep the previous context pinned until the following (this) - * request is retired. - */ - struct i915_gem_context *previous_context; - - /** Batch buffer related to this request if any (used for - error state dump only) */ - struct drm_i915_gem_object *batch_obj; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - /** global list entry for this request */ - struct list_head list; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_list; - - /** process identifier submitting this request */ - struct pid *pid; - - /** - * The ELSP only accepts two elements at a time, so we queue - * context/tail pairs on a given queue (ring->execlist_queue) until the - * hardware is available. The queue serves a double purpose: we also use - * it to keep track of the up to 2 contexts currently in the hardware - * (usually one in execution and the other queued up by the GPU): We - * only remove elements from the head of the queue when the hardware - * informs us that an element has been completed. - * - * All accesses to the queue are mediated by a spinlock - * (ring->execlist_lock). - */ - - /** Execlist link in the submission queue.*/ - struct list_head execlist_link; - - /** Execlists no. of times this request has been sent to the ELSP */ - int elsp_submitted; - - /** Execlists context hardware id. */ - unsigned ctx_hw_id; -}; - -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_free(struct kref *req_ref); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); - -static inline uint32_t -i915_gem_request_get_seqno(struct drm_i915_gem_request *req) -{ - return req ? req->seqno : 0; -} - -static inline struct intel_engine_cs * -i915_gem_request_get_engine(struct drm_i915_gem_request *req) -{ - return req ? req->engine : NULL; -} - -static inline struct drm_i915_gem_request * -i915_gem_request_reference(struct drm_i915_gem_request *req) -{ - if (req) - kref_get(&req->ref); - return req; -} - -static inline void -i915_gem_request_unreference(struct drm_i915_gem_request *req) -{ - kref_put(&req->ref, i915_gem_request_free); -} - -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, - struct drm_i915_gem_request *src) -{ - if (src) - i915_gem_request_reference(src); - - if (*pdst) - i915_gem_request_unreference(*pdst); - - *pdst = src; -} - -/* - * XXX: i915_gem_request_completed should be here but currently needs the - * definition of i915_seqno_passed() which is below. It will be moved in - * a later patch when the call to i915_seqno_passed() is obsoleted... - */ - /* * A command that requires special handling by the command parser. */ @@ -2617,8 +2532,9 @@ struct drm_i915_cmd_descriptor { /* * A table of commands requiring special handling by the command parser. * - * Each ring has an array of tables. Each table consists of an array of command - * descriptors, which must be sorted with command opcodes in ascending order. + * Each engine has an array of tables. Each table consists of an array of + * command descriptors, which must be sorted with command opcodes in + * ascending order. */ struct drm_i915_cmd_table { const struct drm_i915_cmd_descriptor *table; @@ -2932,6 +2848,8 @@ extern int i915_resume_switcheroo(struct drm_device *dev); int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt); +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value); + /* i915_drv.c */ void __printf(3, 4) __i915_printk(struct drm_i915_private *dev_priv, const char *level, @@ -3107,11 +3025,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req); -int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, @@ -3150,40 +3063,24 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, size_t size); struct drm_i915_gem_object *i915_gem_object_create_from_data( struct drm_device *dev, const void *data, size_t size); +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); -void i915_gem_vma_destroy(struct i915_vma *vma); - -/* Flags used by pin/bind&friends. */ -#define PIN_MAPPABLE (1<<0) -#define PIN_NONBLOCK (1<<1) -#define PIN_GLOBAL (1<<2) -#define PIN_OFFSET_BIAS (1<<3) -#define PIN_USER (1<<4) -#define PIN_UPDATE (1<<5) -#define PIN_ZONE_4G (1<<6) -#define PIN_HIGH (1<<7) -#define PIN_OFFSET_FIXED (1<<8) -#define PIN_OFFSET_MASK (~4095) -int __must_check -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags); + int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags); + u64 size, + u64 alignment, + u64 flags); int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); -/* - * BEWARE: Do not use the function below unless you can _absolutely_ - * _guarantee_ VMA in question is _not in use_ anywhere. - */ -int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); + +int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); @@ -3285,10 +3182,10 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req); + struct drm_i915_gem_request *to); void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req); + struct drm_i915_gem_request *req, + unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); @@ -3299,44 +3196,12 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits); -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool -i915_seqno_passed(uint32_t seq1, uint32_t seq2) -{ - return (int32_t)(seq1 - seq2) >= 0; -} - -static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); -} - -static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->seqno); -} - -bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); -static inline bool i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us) -{ - return (i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); -} - -int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine); static inline u32 i915_reset_counter(struct i915_gpu_error *error) { @@ -3381,24 +3246,13 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev); -int i915_gem_init_engines(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); +int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + bool interruptible); int __must_check i915_gem_suspend(struct drm_device *dev); -void __i915_add_request(struct drm_i915_gem_request *req, - struct drm_i915_gem_object *batch_obj, - bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, NULL, true) -#define i915_add_request_no_flush(req) \ - __i915_add_request(req, NULL, false) -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps); -int __must_check i915_wait_request(struct drm_i915_gem_request *req); +void i915_gem_resume(struct drm_device *dev); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, @@ -3419,11 +3273,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode); -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced); +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode); +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode, bool fenced); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); @@ -3444,7 +3297,6 @@ i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o) return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); } -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o); bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, const struct i915_ggtt_view *view); bool i915_gem_obj_bound(struct drm_i915_gem_object *o, @@ -3478,7 +3330,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) return container_of(vm, struct i915_hw_ppgtt, base); } - static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); @@ -3487,18 +3338,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj); -static inline int __must_check -i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, - uint32_t alignment, - unsigned flags) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - return i915_gem_object_pin(obj, &ggtt->base, - alignment, flags | PIN_GLOBAL); -} - void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view); static inline void @@ -3528,6 +3367,7 @@ void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); +int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); void i915_gem_context_free(struct kref *ctx_ref); struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); @@ -3548,12 +3388,14 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline void i915_gem_context_reference(struct i915_gem_context *ctx) +static inline struct i915_gem_context * +i915_gem_context_get(struct i915_gem_context *ctx) { kref_get(&ctx->ref); + return ctx; } -static inline void i915_gem_context_unreference(struct i915_gem_context *ctx) +static inline void i915_gem_context_put(struct i915_gem_context *ctx) { lockdep_assert_held(&ctx->i915->drm.struct_mutex); kref_put(&ctx->ref, i915_gem_context_free); @@ -3576,13 +3418,10 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); /* i915_gem_evict.c */ -int __must_check i915_gem_evict_something(struct drm_device *dev, - struct i915_address_space *vm, - int min_size, - unsigned alignment, +int __must_check i915_gem_evict_something(struct i915_address_space *vm, + u64 min_size, u64 alignment, unsigned cache_level, - unsigned long start, - unsigned long end, + u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_vma(struct i915_vma *target); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); @@ -3634,16 +3473,9 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec struct drm_i915_private *dev_priv = to_i915(obj->base.dev); return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); } -/* i915_gem_debug.c */ -#if WATCH_LISTS -int i915_verify_lists(struct drm_device *dev); -#else -#define i915_verify_lists(dev) 0 -#endif - /* i915_debugfs.c */ #ifdef CONFIG_DEBUG_FS int i915_debugfs_register(struct drm_i915_private *dev_priv); @@ -3684,15 +3516,15 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -int i915_cmd_parser_init_ring(struct intel_engine_cs *engine); -void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine); -bool i915_needs_cmd_parser(struct intel_engine_cs *engine); -int i915_parse_cmds(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master); +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); +bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine); +int intel_engine_cmd_parser(struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master); /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); @@ -3800,7 +3632,6 @@ extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); -extern bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv); int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 11681501d7b1..f4f8eaa90f2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,10 +29,13 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" #include "intel_mocs.h" +#include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/swap.h> @@ -41,10 +44,6 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -139,7 +138,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) if (ret) return ret; - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -156,10 +154,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = 0; mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->base.active_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); @@ -281,23 +279,119 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; -static int -drop_pages(struct drm_i915_gem_object *obj) +int +i915_gem_object_unbind(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; + struct i915_vma *vma; + LIST_HEAD(still_in_list); int ret; - drm_gem_object_reference(&obj->base); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) + /* The vma will only be freed if it is marked as closed, and if we wait + * upon rendering to the vma, we may unbind anything in the list. + */ + while ((vma = list_first_entry_or_null(&obj->vma_list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) break; - - ret = i915_gem_object_put_pages(obj); - drm_gem_object_unreference(&obj->base); + } + list_splice(&still_in_list, &obj->vma_list); return ret; } +/** + * Ensures that all rendering to the object has completed and the object is + * safe to unbind from the GTT or access from the CPU. + * @obj: i915 gem object + * @readonly: waiting for just read access or read-write access + */ +int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) +{ + struct reservation_object *resv; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + if (!readonly) { + active = obj->last_read; + active_mask = i915_gem_object_get_active(obj); + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait(&active[idx], + &obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + long err; + + err = reservation_object_wait_timeout_rcu(resv, !readonly, true, + MAX_SCHEDULE_TIMEOUT); + if (err < 0) + return err; + } + + return 0; +} + +/* A nonblocking variant of the above wait. Must be called prior to + * acquiring the mutex for the object, as the object state may change + * during this call. A reference must be held by the caller for the object. + */ +static __must_check int +__unsafe_wait_rendering(struct drm_i915_gem_object *obj, + struct intel_rps_client *rps, + bool readonly) +{ + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + active_mask = __I915_BO_ACTIVE(obj); + if (!active_mask) + return 0; + + if (!readonly) { + active = obj->last_read; + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait_unlocked(&active[idx], + true, NULL, rps); + if (ret) + return ret; + } + + return 0; +} + +static struct intel_rps_client *to_rps_client(struct drm_file *file) +{ + struct drm_i915_file_private *fpriv = file->driver_priv; + + return &fpriv->rps; +} + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -318,7 +412,11 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->base.filp == NULL) return -EINVAL; - ret = drop_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret) + return ret; + + ret = i915_gem_object_put_pages(obj); if (ret) return ret; @@ -408,7 +506,7 @@ i915_gem_create(struct drm_file *file, ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; @@ -511,6 +609,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (WARN_ON(!i915_gem_object_has_struct_page(obj))) return -EINVAL; + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -518,9 +620,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); - if (ret) - return ret; } ret = i915_gem_object_get_pages(obj); @@ -644,7 +743,7 @@ i915_gem_gtt_pread(struct drm_device *dev, uint64_t offset; int ret; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (ret) { ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); if (ret) @@ -857,36 +956,44 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, args->size)) return -EFAULT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check source. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); + if (ret) + goto err; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err; + ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) + if (ret == -EFAULT || ret == -ENODEV) { + intel_runtime_pm_get(to_i915(dev)); ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); + intel_runtime_pm_put(to_i915(dev)); + } -out: - drm_gem_object_unreference(&obj->base); -unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); + + return ret; + +err: + i915_gem_object_put_unlocked(obj); return ret; } @@ -916,7 +1023,7 @@ fast_user_write(struct io_mapping *mapping, /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. - * @dev: drm device pointer + * @i915: i915 device private data * @obj: i915 gem object * @args: pwrite arguments structure * @file: drm file pointer @@ -935,10 +1042,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, int ret; bool hit_slow_path = false; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) return -EFAULT; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); if (ret) { ret = insert_mappable_node(i915, &node, PAGE_SIZE); if (ret) @@ -1132,15 +1240,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev, obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { /* If we're not in the cpu write domain, set ourself into the gtt * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ @@ -1270,27 +1379,29 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; } - intel_runtime_pm_get(dev_priv); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto put_rpm; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Bounds check destination. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); + if (ret) + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; + ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get @@ -1306,7 +1417,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) { + if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); else if (i915_gem_object_has_struct_page(obj)) @@ -1315,494 +1426,19 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = -ENODEV; } -out: - drm_gem_object_unreference(&obj->base); -unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); -put_rpm: intel_runtime_pm_put(dev_priv); return ret; -} - -static int -i915_gem_check_wedge(unsigned reset_counter, bool interruptible) -{ - if (__i915_terminally_wedged(reset_counter)) - return -EIO; - - if (__i915_reset_in_progress(reset_counter)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - -static unsigned long local_clock_us(unsigned *cpu) -{ - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; -} - -static bool busywait_stop(unsigned long timeout, unsigned cpu) -{ - unsigned this_cpu; - - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; - - return this_cpu != cpu; -} - -bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) -{ - unsigned cpu; - - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. - */ - - timeout_us += local_clock_us(&cpu); - do { - if (i915_gem_request_completed(req)) - return true; - - if (signal_pending_state(state, current)) - break; - - if (busywait_stop(timeout_us, cpu)) - break; - - cpu_relax_lowlatency(); - } while (!need_resched()); - - return false; -} - -/** - * __i915_wait_request - wait until execution of request has finished - * @req: duh! - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * @rps: RPS client - * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. - * - * Returns 0 if the request was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. - */ -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) -{ - int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); - struct intel_wait wait; - unsigned long timeout_remain; - s64 before = 0; /* Only to silence a compiler warning. */ - int ret = 0; - - might_sleep(); - - if (list_empty(&req->list)) - return 0; - - if (i915_gem_request_completed(req)) - return 0; - - timeout_remain = MAX_SCHEDULE_TIMEOUT; - if (timeout) { - if (WARN_ON(*timeout < 0)) - return -EINVAL; - - if (*timeout == 0) - return -ETIME; - - timeout_remain = nsecs_to_jiffies_timeout(*timeout); - - /* - * Record current time in case interrupted by signal, or wedged. - */ - before = ktime_get_raw_ns(); - } - - trace_i915_gem_request_wait_begin(req); - - /* This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. - */ - if (INTEL_INFO(req->i915)->gen >= 6) - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - - /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (i915_spin_request(req, state, 5)) - goto complete; - - set_current_state(state); - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->seqno); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - for (;;) { - if (signal_pending_state(state, current)) { - ret = -ERESTARTSYS; - break; - } - - timeout_remain = io_schedule_timeout(timeout_remain); - if (timeout_remain == 0) { - ret = -ETIME; - break; - } - - if (intel_wait_complete(&wait)) - break; - - set_current_state(state); - -wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* Only spin if we know the GPU is processing this request */ - if (i915_spin_request(req, state, 2)) - break; - } - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); -complete: - trace_i915_gem_request_wait_end(req); - - if (timeout) { - s64 tres = *timeout - (ktime_get_raw_ns() - before); - - *timeout = tres < 0 ? 0 : tres; - - /* - * Apparently ktime isn't accurate enough and occasionally has a - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch - * things up to make the test happy. We allow up to 1 jiffy. - * - * This is a regrssion from the timespec->ktime conversion. - */ - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) - *timeout = 0; - } - - if (rps && req->seqno == req->engine->last_submitted_seqno) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&req->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&req->i915->rps.client_lock); - } - - return ret; -} - -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - req->pid = get_pid(task_pid(current)); - - return 0; -} - -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); - - put_pid(request->pid); - request->pid = NULL; -} - -static void i915_gem_request_retire(struct drm_i915_gem_request *request) -{ - trace_i915_gem_request_retire(request); - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - request->ringbuf->last_retired_head = request->postfix; - - list_del_init(&request->list); - i915_gem_request_remove_from_client(request); - - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - - i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); -} - -static void -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (list_empty(&req->list)) - return; - - do { - tmp = list_first_entry(&engine->request_list, - typeof(*tmp), list); - - i915_gem_request_retire(tmp); - } while (tmp != req); - - WARN_ON(i915_verify_lists(engine->dev)); -} - -/** - * Waits for a request to be signaled, and cleans up the - * request and object lists appropriately for that event. - * @req: request to wait on - */ -int -i915_wait_request(struct drm_i915_gem_request *req) -{ - struct drm_i915_private *dev_priv = req->i915; - bool interruptible; - int ret; - - interruptible = dev_priv->mm.interruptible; - - BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); - - ret = __i915_wait_request(req, interruptible, NULL, NULL); - if (ret) - return ret; - - /* If the GPU hung, we want to keep the requests to find the guilty. */ - if (!i915_reset_in_progress(&dev_priv->gpu_error)) - __i915_gem_request_retire__upto(req); - - return 0; -} - -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - * @obj: i915 gem object - * @readonly: waiting for read access or write - */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) -{ - int ret, i; - - if (!obj->active) - return 0; - - if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); - if (ret) - return ret; - - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) - i915_gem_object_retire__read(obj, i); - else - i915_gem_object_retire__write(obj); - } - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) - continue; - - ret = i915_wait_request(obj->last_read_req[i]); - if (ret) - return ret; - - i915_gem_object_retire__read(obj, i); - } - GEM_BUG_ON(obj->active); - } - - return 0; -} - -static void -i915_gem_object_retire_request(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req) -{ - int ring = req->engine->id; - - if (obj->last_read_req[ring] == req) - i915_gem_object_retire__read(obj, ring); - else if (obj->last_write_req == req) - i915_gem_object_retire__write(obj); - - if (!i915_reset_in_progress(&req->i915->gpu_error)) - __i915_gem_request_retire__upto(req); -} - -/* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. - */ -static __must_check int -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int ret, i, n = 0; - - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!dev_priv->mm.interruptible); - - if (!obj->active) - return 0; - - if (readonly) { - struct drm_i915_gem_request *req; - - req = obj->last_write_req; - if (req == NULL) - return 0; - - requests[n++] = i915_gem_request_reference(req); - } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - requests[n++] = i915_gem_request_reference(req); - } - } - - mutex_unlock(&dev->struct_mutex); - ret = 0; - for (i = 0; ret == 0 && i < n; i++) - ret = __i915_wait_request(requests[i], true, NULL, rps); - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - i915_gem_object_retire_request(obj, requests[i]); - i915_gem_request_unreference(requests[i]); - } +err_rpm: + intel_runtime_pm_put(dev_priv); +err: + i915_gem_object_put_unlocked(obj); return ret; } -static struct intel_rps_client *to_rps_client(struct drm_file *file) -{ - struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; -} - static enum fb_op_origin write_origin(struct drm_i915_gem_object *obj, unsigned domain) { @@ -1828,10 +1464,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, int ret; /* Only handle setting domains to types used by the CPU. */ - if (write_domain & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - if (read_domains & I915_GEM_GPU_DOMAINS) + if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; /* Having something in the write domain implies it's in the read @@ -1840,25 +1473,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0 && read_domains != write_domain) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Try to flush the object off the GPU without holding the lock. * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, - to_rps_client(file), - !write_domain); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); if (ret) - goto unref; + goto err; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err; if (read_domains & I915_GEM_DOMAIN_GTT) ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); @@ -1868,11 +1497,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0) intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); -unref: - drm_gem_object_unreference(&obj->base); -unlock: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return ret; + +err: + i915_gem_object_put_unlocked(obj); + return ret; } /** @@ -1887,26 +1518,23 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int ret = 0; + int err = 0; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_display) - i915_gem_object_flush_cpu_write_domain(obj); + if (READ_ONCE(obj->pin_display)) { + err = i915_mutex_lock_interruptible(dev); + if (!err) { + i915_gem_object_flush_cpu_write_domain(obj); + mutex_unlock(&dev->struct_mutex); + } + } - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return err; } /** @@ -1934,7 +1562,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap *args = data; - struct drm_gem_object *obj; + struct drm_i915_gem_object *obj; unsigned long addr; if (args->flags & ~(I915_MMAP_WC)) @@ -1943,19 +1571,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) return -ENODEV; - obj = drm_gem_object_lookup(file, args->handle); - if (obj == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; /* prime objects have no backing filp to GEM mmap * pages from. */ - if (!obj->filp) { - drm_gem_object_unreference_unlocked(obj); + if (!obj->base.filp) { + i915_gem_object_put_unlocked(obj); return -EINVAL; } - addr = vm_mmap(obj->filp, 0, args->size, + addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); if (args->flags & I915_MMAP_WC) { @@ -1963,7 +1591,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - drm_gem_object_unreference_unlocked(obj); + i915_gem_object_put_unlocked(obj); return -EINTR; } vma = find_vma(mm, addr); @@ -1975,9 +1603,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); + WRITE_ONCE(obj->has_wc_mmap, true); } - drm_gem_object_unreference_unlocked(obj); + i915_gem_object_put_unlocked(obj); if (IS_ERR((void *)addr)) return addr; @@ -2009,41 +1637,41 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt_view view = i915_ggtt_view_normal; + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); pgoff_t page_offset; unsigned long pfn; - int ret = 0; - bool write = !!(vmf->flags & FAULT_FLAG_WRITE); - - intel_runtime_pm_get(dev_priv); + int ret; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto out; - trace_i915_gem_object_fault(obj, page_offset, true, write); /* Try to flush the object off the GPU first without holding the lock. - * Upon reacquiring the lock, we will perform our sanity checks and then + * Upon acquiring the lock, we will perform our sanity checks and then * repeat the flush holding the lock in the normal manner to catch cases * where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); + ret = __unsafe_wait_rendering(obj, NULL, !write); if (ret) - goto unlock; + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; /* Access to snoopable pages through the GTT is incoherent. */ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { ret = -EFAULT; - goto unlock; + goto err_unlock; } /* Use a partial view if the object is bigger than the aperture. */ if (obj->base.size >= ggtt->mappable_end && - obj->tiling_mode == I915_TILING_NONE) { + !i915_gem_object_is_tiled(obj)) { static const unsigned int chunk_size = 256; // 1 MiB memset(&view, 0, sizeof(view)); @@ -2057,17 +1685,17 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (ret) - goto unlock; + goto err_unlock; ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) - goto unpin; + goto err_unpin; ret = i915_gem_object_get_fence(obj); if (ret) - goto unpin; + goto err_unpin; /* Finally, remap it using the new GTT offset */ pfn = ggtt->mappable_base + @@ -2112,11 +1740,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) (unsigned long)vmf->virtual_address, pfn + page_offset); } -unpin: +err_unpin: i915_gem_object_ggtt_unpin_view(obj, &view); -unlock: +err_unlock: mutex_unlock(&dev->struct_mutex); -out: +err_rpm: + intel_runtime_pm_put(dev_priv); +err: switch (ret) { case -EIO: /* @@ -2157,8 +1787,6 @@ out: ret = VM_FAULT_SIGBUS; break; } - - intel_runtime_pm_put(dev_priv); return ret; } @@ -2212,46 +1840,58 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) i915_gem_release_mmap(obj); } -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) +/** + * i915_gem_get_ggtt_size - return required global GTT size for an object + * @dev_priv: i915 device + * @size: object size + * @tiling_mode: tiling mode + * + * Return the required global GTT size for an object, taking into account + * potential fence register mapping. + */ +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, + u64 size, int tiling_mode) { - uint32_t gtt_size; + u64 ggtt_size; - if (INTEL_INFO(dev)->gen >= 4 || + GEM_BUG_ON(size == 0); + + if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) return size; /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN3(dev)) - gtt_size = 1024*1024; + if (IS_GEN3(dev_priv)) + ggtt_size = 1024*1024; else - gtt_size = 512*1024; + ggtt_size = 512*1024; - while (gtt_size < size) - gtt_size <<= 1; + while (ggtt_size < size) + ggtt_size <<= 1; - return gtt_size; + return ggtt_size; } /** - * i915_gem_get_gtt_alignment - return required GTT alignment for an object - * @dev: drm device + * i915_gem_get_ggtt_alignment - return required global GTT alignment + * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode - * @fenced: is fenced alignemned required or not + * @fenced: is fenced alignment required or not * - * Return the required GTT alignment for an object, taking into account + * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced) +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode, bool fenced) { + GEM_BUG_ON(size == 0); + /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || + if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) || tiling_mode == I915_TILING_NONE) return 4096; @@ -2259,42 +1899,34 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); + return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int ret; - - dev_priv->mm.shrinker_no_lock_stealing = true; + int err; - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = drm_gem_create_mmap_offset(&obj->base); + if (!err) + return 0; - /* Badly fragmented mmap space? The only way we can recover - * space is by destroying unwanted objects. We can't randomly release - * mmap_offsets as userspace expects them to be persistent for the - * lifetime of the objects. The closest we can is to release the - * offsets on purgeable objects by truncating it and marking it purged, - * which prevents userspace from ever using that object again. + /* We can idle the GPU locklessly to flush stale objects, but in order + * to claim that space for ourselves, we need to take the big + * struct_mutex to free the requests+objects and allocate our slot. */ - i915_gem_shrink(dev_priv, - obj->base.size >> PAGE_SHIFT, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = i915_gem_wait_for_idle(dev_priv, true); + if (err) + return err; - i915_gem_shrink_all(dev_priv); - ret = drm_gem_create_mmap_offset(&obj->base); -out: - dev_priv->mm.shrinker_no_lock_stealing = false; + err = i915_mutex_lock_interruptible(&dev_priv->drm); + if (!err) { + i915_gem_retire_requests(dev_priv); + err = drm_gem_create_mmap_offset(&obj->base); + mutex_unlock(&dev_priv->drm.struct_mutex); + } - return ret; + return err; } static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) @@ -2311,32 +1943,15 @@ i915_gem_mmap_gtt(struct drm_file *file, struct drm_i915_gem_object *obj; int ret; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); - ret = -EFAULT; - goto out; - } + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; ret = i915_gem_object_create_mmap_offset(obj); - if (ret) - goto out; - - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); + if (ret == 0) + *offset = drm_vma_node_offset_addr(&obj->base.vma_node); -out: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put_unlocked(obj); return ret; } @@ -2454,7 +2069,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) if (obj->pages_pin_count) return -EBUSY; - BUG_ON(i915_gem_obj_bound_any(obj)); + GEM_BUG_ON(obj->bind_count); /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -2574,7 +2189,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj); - if (obj->tiling_mode != I915_TILING_NONE && + if (i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) i915_gem_object_pin_pages(obj); @@ -2698,253 +2313,39 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) return obj->mapping; } -void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct intel_engine_cs *engine; - - engine = i915_gem_request_get_engine(req); - - /* Add a reference if we're newly entering the active list. */ - if (obj->active == 0) - drm_gem_object_reference(&obj->base); - obj->active |= intel_engine_flag(engine); - - list_move_tail(&obj->engine_list[engine->id], &engine->active_list); - i915_gem_request_assign(&obj->last_read_req[engine->id], req); - - list_move_tail(&vma->vm_link, &vma->vm->active_list); -} - static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj) +i915_gem_object_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) { - GEM_BUG_ON(obj->last_write_req == NULL); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_write); - i915_gem_request_assign(&obj->last_write_req, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); } static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) +i915_gem_object_retire__read(struct i915_gem_active *active, + struct drm_i915_gem_request *request) { - struct i915_vma *vma; - - GEM_BUG_ON(obj->last_read_req[ring] == NULL); - GEM_BUG_ON(!(obj->active & (1 << ring))); + int idx = request->engine->id; + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_read[idx]); - list_del_init(&obj->engine_list[ring]); - i915_gem_request_assign(&obj->last_read_req[ring], NULL); + GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); - if (obj->last_write_req && obj->last_write_req->engine->id == ring) - i915_gem_object_retire__write(obj); - - obj->active &= ~(1 << ring); - if (obj->active) + i915_gem_object_clear_active(obj, idx); + if (i915_gem_object_is_active(obj)) return; /* Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - list_move_tail(&obj->global_list, - &to_i915(obj->base.dev)->mm.bound_list); - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!list_empty(&vma->vm_link)) - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - } - - i915_gem_request_assign(&obj->last_fenced_req, NULL); - drm_gem_object_unreference(&obj->base); -} - -static int -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) -{ - struct intel_engine_cs *engine; - int ret; - - /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); - if (ret) - return ret; - } - i915_gem_retire_requests(dev_priv); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) - yield(); - } - - /* Finally reset hw state */ - for_each_engine(engine, dev_priv) - intel_ring_init_seqno(engine, seqno); - - return 0; -} - -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev_priv, seqno - 1); - if (ret) - return ret; - - /* Carefully set the last_seqno value so that wrap - * detection still works - */ - dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - - return 0; -} - -int -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) -{ - /* reserve 0 for non-seqno */ - if (dev_priv->next_seqno == 0) { - int ret = i915_gem_init_seqno(dev_priv, 0); - if (ret) - return ret; - - dev_priv->next_seqno = 1; - } - - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; - return 0; -} - -static void i915_gem_mark_busy(const struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - dev_priv->gt.active_engines |= intel_engine_flag(engine); - if (dev_priv->gt.awake) - return; - - intel_runtime_pm_get_noresume(dev_priv); - dev_priv->gt.awake = true; - - i915_update_gfx_val(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); - - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -/* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ -void __i915_add_request(struct drm_i915_gem_request *request, - struct drm_i915_gem_object *obj, - bool flush_caches) -{ - struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; - u32 request_start; - u32 reserved_tail; - int ret; - - if (WARN_ON(request == NULL)) - return; - - engine = request->engine; - ringbuf = request->ringbuf; - - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request_start = intel_ring_get_tail(ringbuf); - reserved_tail = request->reserved_space; - request->reserved_space = 0; + if (obj->bind_count) + list_move_tail(&obj->global_list, + &request->i915->mm.bound_list); - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_ring_flush_all_caches(request); - /* Not allowed to fail! */ - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); - } - - trace_i915_gem_request_add(request); - - request->head = request_start; - - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - request->batch_obj = obj; - - /* Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); - list_add_tail(&request->list, &engine->request_list); - - /* Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - request->postfix = intel_ring_get_tail(ringbuf); - - if (i915.enable_execlists) - ret = engine->emit_request(request); - else { - ret = engine->add_request(request); - - request->tail = intel_ring_get_tail(ringbuf); - } - /* Not allowed to fail! */ - WARN(ret, "emit|add_request failed: %d!\n", ret); - /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ringbuf) - request_start; - if (ret < 0) - ret += ringbuf->size; - WARN_ONCE(ret > reserved_tail, - "Not enough space reserved (%d bytes) " - "for adding the request (%d bytes)\n", - reserved_tail, ret); - - i915_gem_mark_busy(engine); + i915_gem_object_put(obj); } static bool i915_context_is_banned(const struct i915_gem_context *ctx) @@ -2978,101 +2379,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx, } } -void i915_gem_request_free(struct kref *req_ref) -{ - struct drm_i915_gem_request *req = container_of(req_ref, - typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); -} - -static inline int -__i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) -{ - struct drm_i915_private *dev_priv = engine->i915; - unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); - struct drm_i915_gem_request *req; - int ret; - - if (!req_out) - return -EINVAL; - - *req_out = NULL; - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. - */ - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); - if (ret) - return ret; - - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); - if (req == NULL) - return -ENOMEM; - - ret = i915_gem_get_seqno(engine->i915, &req->seqno); - if (ret) - goto err; - - kref_init(&req->ref); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - i915_gem_context_reference(req->ctx); - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); - if (ret) - goto err_ctx; - - *req_out = req; - return 0; - -err_ctx: - i915_gem_context_unreference(ctx); -err: - kmem_cache_free(dev_priv->requests, req); - return ret; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_gem_request *req; - int err; - - if (ctx == NULL) - ctx = engine->i915->kernel_context; - err = __i915_gem_request_alloc(engine, ctx, &req); - return err ? ERR_PTR(err) : req; -} - struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -3086,7 +2392,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &engine->request_list, list) { + list_for_each_entry(request, &engine->request_list, link) { if (i915_gem_request_completed(request)) continue; @@ -3108,23 +2414,24 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; i915_set_reset_status(request->ctx, ring_hung); - list_for_each_entry_continue(request, &engine->request_list, list) + list_for_each_entry_continue(request, &engine->request_list, link) i915_set_reset_status(request->ctx, false); } static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { - struct intel_ringbuffer *buffer; - - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_request *request; + struct intel_ring *ring; - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); + request = i915_gem_active_peek(&engine->last_request, + &engine->i915->drm.struct_mutex); - i915_gem_object_retire__read(obj, engine->id); - } + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + if (request) + intel_engine_init_seqno(engine, request->fence.seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -3146,15 +2453,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); - - i915_gem_request_retire(request); - } + if (request) + i915_gem_request_retire_upto(request); + GEM_BUG_ON(intel_engine_is_active(engine)); /* Having flushed all requests from all queues, we know that all * ringbuffers must now be empty. However, since we do not reclaim @@ -3163,12 +2464,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * upon reset is less than when we start. Do one more pass over * all the ringbuffers to reset last_retired_head. */ - list_for_each_entry(buffer, &engine->buffers, link) { - buffer->last_retired_head = buffer->tail; - intel_ring_update_space(buffer); + list_for_each_entry(ring, &engine->buffers, link) { + ring->last_retired_head = ring->tail; + intel_ring_update_space(ring); } - intel_ring_init_seqno(engine, engine->last_submitted_seqno); + engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_device *dev) @@ -3186,82 +2487,11 @@ void i915_gem_reset(struct drm_device *dev) for_each_engine(engine, dev_priv) i915_gem_reset_engine_cleanup(engine); + mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_context_reset(dev); i915_gem_restore_fences(dev); - - WARN_ON(i915_verify_lists(dev)); -} - -/** - * This function clears the request list as sequence numbers are passed. - * @engine: engine to retire requests on - */ -void -i915_gem_retire_requests_ring(struct intel_engine_cs *engine) -{ - WARN_ON(i915_verify_lists(engine->dev)); - - /* Retire requests first as we use it above for the early return. - * If we retire requests last, we may use a later seqno and so clear - * the requests lists without clearing the active list, leading to - * confusion. - */ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); - - if (!i915_gem_request_completed(request)) - break; - - i915_gem_request_retire(request); - } - - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. - */ - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); - - if (!list_empty(&obj->last_read_req[engine->id]->list)) - break; - - i915_gem_object_retire__read(obj, engine->id); - } - - WARN_ON(i915_verify_lists(engine->dev)); -} - -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (dev_priv->gt.active_engines == 0) - return; - - GEM_BUG_ON(!dev_priv->gt.awake); - - for_each_engine(engine, dev_priv) { - i915_gem_retire_requests_ring(engine); - if (list_empty(&engine->request_list)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); - } - - if (dev_priv->gt.active_engines == 0) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(100)); } static void @@ -3281,10 +2511,12 @@ i915_gem_retire_work_handler(struct work_struct *work) * We do not need to do this test under locking as in the worst-case * we queue the retire worker once too often. */ - if (READ_ONCE(dev_priv->gt.awake)) + if (READ_ONCE(dev_priv->gt.awake)) { + i915_queue_hangcheck(dev_priv); queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, round_jiffies_up_relative(HZ)); + } } static void @@ -3324,11 +2556,14 @@ i915_gem_idle_work_handler(struct work_struct *work) dev_priv->gt.awake = false; rearm_hangcheck = false; + /* As we have disabled hangcheck, we need to unstick any waiters still + * hanging around. However, as we may be racing against the interrupt + * handler or the waiters themselves, we skip enabling the fake-irq. + */ stuck_engines = intel_kick_waiters(dev_priv); - if (unlikely(stuck_engines)) { - DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); - dev_priv->gpu_error.missed_irq_rings |= stuck_engines; - } + if (unlikely(stuck_engines)) + DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", + stuck_engines); if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); @@ -3343,32 +2578,17 @@ out_rearm: } } -/** - * Ensures that an object will eventually get non-busy by flushing any required - * write domains, emitting any outstanding lazy request and retiring and - * completed requests. - * @obj: object to flush - */ -static int -i915_gem_object_flush_active(struct drm_i915_gem_object *obj) +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { - int i; - - if (!obj->active) - return 0; - - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - if (i915_gem_request_completed(req)) - i915_gem_object_retire__read(obj, i); - } + struct drm_i915_gem_object *obj = to_intel_bo(gem); + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_vma *vma, *vn; - return 0; + mutex_lock(&obj->base.dev->struct_mutex); + list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) + if (vma->vm->file == fpriv) + i915_vma_close(vma); + mutex_unlock(&obj->base.dev->struct_mutex); } /** @@ -3399,122 +2619,58 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; + struct intel_rps_client *rps = to_rps_client(file); struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; - int i, n = 0; - int ret; + unsigned long active; + int idx, ret = 0; if (args->flags != 0) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); - if (&obj->base == NULL) { - mutex_unlock(&dev->struct_mutex); + obj = i915_gem_object_lookup(file, args->bo_handle); + if (!obj) return -ENOENT; - } - - /* Need to make sure the object gets inactive eventually. */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto out; - - if (!obj->active) - goto out; - - /* Do this after OLR check to make sure we make forward progress polling - * on this IOCTL with a timeout == 0 (like busy ioctl) - */ - if (args->timeout_ns == 0) { - ret = -ETIME; - goto out; - } - - drm_gem_object_unreference(&obj->base); - - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) - continue; - req[n++] = i915_gem_request_reference(obj->last_read_req[i]); + active = __I915_BO_ACTIVE(obj); + for_each_active(active, idx) { + s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; + ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true, + timeout, rps); + if (ret) + break; } - mutex_unlock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - ret = __i915_wait_request(req[i], true, - args->timeout_ns > 0 ? &args->timeout_ns : NULL, - to_rps_client(file)); - i915_gem_request_unreference(req[i]); - } - return ret; - -out: - drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put_unlocked(obj); return ret; } static int -__i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request *from_req, - struct drm_i915_gem_request **to_req) +__i915_gem_object_sync(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from) { - struct intel_engine_cs *from; int ret; - from = i915_gem_request_get_engine(from_req); - if (to == from) - return 0; - - if (i915_gem_request_completed(from_req)) + if (to->engine == from->engine) return 0; - if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(from_req, - i915->mm.interruptible, - NULL, - &i915->rps.semaphores); + if (!i915.semaphores) { + ret = i915_wait_request(from, + from->i915->mm.interruptible, + NULL, + NO_WAITBOOST); if (ret) return ret; - - i915_gem_object_retire_request(obj, from_req); } else { - int idx = intel_ring_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(from_req); - - WARN_ON(!to_req); - - if (seqno <= from->semaphore.sync_seqno[idx]) + int idx = intel_engine_sync_index(from->engine, to->engine); + if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; - if (*to_req == NULL) { - struct drm_i915_gem_request *req; - - req = i915_gem_request_alloc(to, NULL); - if (IS_ERR(req)) - return PTR_ERR(req); - - *to_req = req; - } - - trace_i915_gem_ring_sync_to(*to_req, from, from_req); - ret = to->semaphore.sync_to(*to_req, from, seqno); + trace_i915_gem_ring_sync_to(to, from); + ret = to->engine->semaphore.sync_to(to, from); if (ret) return ret; - /* We use last_read_req because sync_to() - * might have just caused seqno wrap under - * the radar. - */ - from->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->id]); + from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; } return 0; @@ -3524,17 +2680,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * i915_gem_object_sync - sync an object to a ring. * * @obj: object which may be in use on another ring. - * @to: ring we wish to use the object on. May be NULL. - * @to_req: request we wish to use the object for. See below. - * This will be allocated and returned if a request is - * required but not passed in. + * @to: request we are wishing to use * * This code is meant to abstract object synchronization with the GPU. - * Calling with NULL implies synchronizing the object with the CPU - * rather than a particular GPU ring. Conceptually we serialise writes - * between engines inside the GPU. We only allow one engine to write - * into a buffer at any time, but multiple readers. To ensure each has - * a coherent view of memory, we must: + * Conceptually we serialise writes between engines inside the GPU. + * We only allow one engine to write into a buffer at any time, but + * multiple readers. To ensure each has a coherent view of memory, we must: * * - If there is an outstanding write request to the object, the new * request must wait for it to complete (either CPU or in hw, requests @@ -3543,44 +2694,39 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * - If we are a write request (pending_write_domain is set), the new * request must wait for outstanding read requests to complete. * - * For CPU synchronisation (NULL to) no request is required. For syncing with - * rings to_req must be non-NULL. However, a request does not have to be - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a - * request will be allocated automatically and returned through *to_req. Note - * that it is not guaranteed that commands will be emitted (because the system - * might already be idle). Hence there is no need to create a request that - * might never have any work submitted. Note further that if a request is - * returned in *to_req, it is the responsibility of the caller to submit - * that request (after potentially adding more work to it). - * * Returns 0 if successful, else propagates up the lower layer error. */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req) + struct drm_i915_gem_request *to) { - const bool readonly = obj->base.pending_write_domain == 0; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; - int ret, i, n; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; - if (!obj->active) - return 0; + lockdep_assert_held(&obj->base.dev->struct_mutex); - if (to == NULL) - return i915_gem_object_wait_rendering(obj, readonly); + active_mask = i915_gem_object_get_active(obj); + if (!active_mask) + return 0; - n = 0; - if (readonly) { - if (obj->last_write_req) - req[n++] = obj->last_write_req; + if (obj->base.pending_write_domain) { + active = obj->last_read; } else { - for (i = 0; i < I915_NUM_ENGINES; i++) - if (obj->last_read_req[i]) - req[n++] = obj->last_read_req[i]; + active_mask = 1; + active = &obj->last_write; } - for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i], to_req); + + for_each_active(active_mask, idx) { + struct drm_i915_gem_request *request; + int ret; + + request = i915_gem_active_peek(&active[idx], + &obj->base.dev->struct_mutex); + if (!request) + continue; + + ret = __i915_gem_object_sync(to, request); if (ret) return ret; } @@ -3611,7 +2757,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) static void __i915_vma_iounmap(struct i915_vma *vma) { - GEM_BUG_ON(vma->pin_count); + GEM_BUG_ON(i915_vma_is_pinned(vma)); if (vma->iomap == NULL) return; @@ -3620,32 +2766,51 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } -static int __i915_vma_unbind(struct i915_vma *vma, bool wait) +int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + unsigned long active; int ret; - if (list_empty(&vma->obj_link)) - return 0; + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active) { + int idx; + + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + */ + __i915_vma_pin(vma); - if (!drm_mm_node_allocated(&vma->node)) { - i915_gem_vma_destroy(vma); - return 0; + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + break; + } + + __i915_vma_unpin(vma); + if (ret) + return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); } - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return -EBUSY; - BUG_ON(obj->pages == NULL); + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; - if (wait) { - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!obj->pages); - if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { + if (i915_vma_is_ggtt(vma) && + vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { i915_gem_object_finish_gtt(obj); /* release the fence reg _after_ flushing */ @@ -3656,13 +2821,16 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) __i915_vma_iounmap(vma); } - trace_i915_vma_unbind(vma); + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - vma->vm->unbind_vma(vma); - vma->bound = 0; + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - list_del_init(&vma->vm_link); - if (vma->is_ggtt) { + if (i915_vma_is_ggtt(vma)) { if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { obj->map_and_fenceable = false; } else if (vma->ggtt_view.pages) { @@ -3672,13 +2840,11 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) vma->ggtt_view.pages = NULL; } - drm_mm_remove_node(&vma->node); - i915_gem_vma_destroy(vma); - /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ - if (list_empty(&obj->vma_list)) - list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); + if (--obj->bind_count == 0) + list_move_tail(&obj->global_list, + &to_i915(obj->base.dev)->mm.unbound_list); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@ -3686,36 +2852,28 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) */ i915_gem_object_unpin_pages(obj); - return 0; -} - -int i915_vma_unbind(struct i915_vma *vma) -{ - return __i915_vma_unbind(vma, true); -} +destroy: + if (unlikely(i915_vma_is_closed(vma))) + i915_vma_destroy(vma); -int __i915_vma_unbind_no_wait(struct i915_vma *vma) -{ - return __i915_vma_unbind(vma, false); + return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + bool interruptible) { struct intel_engine_cs *engine; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv) { if (engine->last_context == NULL) continue; - ret = intel_engine_idle(engine); + ret = intel_engine_idle(engine, interruptible); if (ret) return ret; } - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -3753,128 +2911,95 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, } /** - * Finds free space in the GTT aperture and binds the object or a view of it - * there. - * @obj: object to bind - * @vm: address space to bind into - * @ggtt_view: global gtt view if applicable - * @alignment: requested alignment + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma + * @size: requested size in bytes (can be larger than the VMA) + * @alignment: required alignment * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. */ -static struct i915_vma * -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - unsigned alignment, - uint64_t flags) +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 fence_alignment, unfenced_alignment; - u32 search_flag, alloc_flag; + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; u64 start, end; - u64 size, fence_size; - struct i915_vma *vma; + u64 min_alignment; int ret; - if (i915_is_ggtt(vm)) { - u32 view_size; - - if (WARN_ON(!ggtt_view)) - return ERR_PTR(-EINVAL); - - view_size = i915_ggtt_view_size(obj, ggtt_view); - - fence_size = i915_gem_get_gtt_size(dev, - view_size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - true); - unfenced_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : view_size; - } else { - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); + + min_alignment = + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE); + if (alignment == 0) + alignment = min_alignment; + if (alignment & (min_alignment - 1)) { + DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", + alignment, min_alignment); + return -EINVAL; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - end = vm->total; + + end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, ggtt->mappable_end); + end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", - ggtt_view ? ggtt_view->type : 0, - size, + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", + size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); - vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - - if (IS_ERR(vma)) - goto err_unpin; - if (flags & PIN_OFFSET_FIXED) { - uint64_t offset = flags & PIN_OFFSET_MASK; - - if (offset & (alignment - 1) || offset + size > end) { + u64 offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1) || offset > end - size) { ret = -EINVAL; - goto err_free_vma; + goto err_unpin; } + vma->node.start = offset; vma->node.size = size; vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); if (ret) { ret = i915_gem_evict_for_vma(vma); if (ret == 0) - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; } - if (ret) - goto err_free_vma; } else { + u32 search_flag, alloc_flag; + if (flags & PIN_HIGH) { search_flag = DRM_MM_SEARCH_BELOW; alloc_flag = DRM_MM_CREATE_TOP; @@ -3883,47 +3008,45 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, alloc_flag = DRM_MM_CREATE_DEFAULT; } + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, size, alignment, obj->cache_level, start, end, search_flag, alloc_flag); if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, + ret = i915_gem_evict_something(vma->vm, size, alignment, obj->cache_level, start, end, flags); if (ret == 0) goto search_free; - goto err_free_vma; + goto err_unpin; } } - if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { - ret = -EINVAL; - goto err_remove_node; - } - - trace_i915_vma_bind(vma, flags); - ret = i915_vma_bind(vma, obj->cache_level, flags); - if (ret) - goto err_remove_node; + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&vma->vm_link, &vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->bind_count++; - return vma; + return 0; -err_remove_node: - drm_mm_remove_node(&vma->node); -err_free_vma: - i915_gem_vma_destroy(vma); - vma = ERR_PTR(ret); err_unpin: i915_gem_object_unpin_pages(obj); - return vma; + return ret; } bool @@ -4026,20 +3149,17 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t old_write_domain, old_read_domains; struct i915_vma *vma; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) + return 0; + /* Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. @@ -4081,9 +3201,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) /* And bump the LRU for this access */ vma = i915_gem_obj_to_ggtt(obj); - if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) - list_move_tail(&vma->vm_link, - &ggtt->base.inactive_list); + if (vma && + drm_mm_node_allocated(&vma->node) && + !i915_vma_is_active(vma)) + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); return 0; } @@ -4106,9 +3227,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; - bool bound = false; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@ -4119,21 +3238,28 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { +restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; - if (vma->pin_count) { + if (i915_vma_is_pinned(vma)) { DRM_DEBUG("can not change the cache level of pinned objects\n"); return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } else - bound = true; + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the + * obj->vma_list (due to side-effects from retiring + * an active vma), play safe and restart the iterator. + */ + goto restart; } /* We can reuse the existing drm_mm nodes but need to change the @@ -4143,7 +3269,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * rewrite the PTE in the belief that doing so tramples upon less * state and so involves less work. */ - if (bound) { + if (obj->bind_count) { /* Before we change the PTE, the GPU must not be accessing it. * If we wait upon the object, we know that all the bound * VMA are no longer active. @@ -4152,7 +3278,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret) return ret; - if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { + if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) { /* Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force @@ -4215,8 +3341,8 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; switch (obj->cache_level) { @@ -4234,7 +3360,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, break; } - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return 0; } @@ -4276,15 +3402,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto rpm_put; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } ret = i915_gem_object_set_cache_level(obj, level); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); rpm_put: @@ -4329,7 +3455,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_ggtt_pin(obj, view, alignment, + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, view->type == I915_GGTT_VIEW_NORMAL ? PIN_MAPPABLE : 0); if (ret) @@ -4383,13 +3509,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return 0; + i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -4464,25 +3590,30 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_reference(target); + i915_gem_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; - ret = __i915_wait_request(target, true, NULL, NULL); - i915_gem_request_unreference(target); + ret = i915_wait_request(target, true, NULL, NULL); + i915_gem_request_put(target); return ret; } static bool -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; - if (alignment && - vma->node.start & (alignment - 1)) + if (!drm_mm_node_allocated(&vma->node)) + return false; + + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) return true; if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) @@ -4502,135 +3633,159 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool mappable, fenceable; u32 fence_size, fence_alignment; - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); + fence_size = i915_gem_get_ggtt_size(dev_priv, + obj->base.size, + i915_gem_object_get_tiling(obj)); + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, + obj->base.size, + i915_gem_object_get_tiling(obj), + true); fenceable = (vma->node.size == fence_size && (vma->node.start & (fence_alignment - 1)) == 0); mappable = (vma->node.start + fence_size <= - to_i915(obj->base.dev)->ggtt.mappable_end); + dev_priv->ggtt.mappable_end); obj->map_and_fenceable = mappable && fenceable; } -static int -i915_gem_object_do_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - uint32_t alignment, - uint64_t flags) +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - unsigned bound; + unsigned int bound = vma->flags; int ret; - if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) - return -ENODEV; + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) - return -EINVAL; - - if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) - return -EINVAL; - - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return -EINVAL; - - vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : - i915_gem_obj_to_vma(obj, vm); - - if (vma) { - if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; - - if (i915_vma_misplaced(vma, alignment, flags)) { - WARN(vma->pin_count, - "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - ggtt_view ? "ggtt" : "ppgtt", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - alignment, - !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); - ret = i915_vma_unbind(vma); - if (ret) - return ret; - - vma = NULL; - } + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; } - bound = vma ? vma->bound : 0; - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, - flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - } else { - ret = i915_vma_bind(vma, obj->cache_level, flags); + if ((bound & I915_VMA_BIND_MASK) == 0) { + ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - return ret; + goto err; } - if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && - (bound ^ vma->bound) & GLOBAL_BIND) { + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); + if (ret) + goto err; + + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - } - vma->pin_count++; + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -} -int -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags) -{ - return i915_gem_object_do_pin(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - alignment, flags); +err: + __i915_vma_unpin(vma); + return ret; } int i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags) + u64 size, + u64 alignment, + u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma; + int ret; + + if (!view) + view = &i915_ggtt_view_normal; - BUG_ON(!view); + vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK && + (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) + return -ENOSPC; + + WARN(i915_vma_is_pinned(vma), + "bo is already pinned in ggtt with incorrect alignment:" + " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," + " obj->map_and_fenceable=%d\n", + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), + alignment, + !!(flags & PIN_MAPPABLE), + obj->map_and_fenceable); + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } - return i915_gem_object_do_pin(obj, &ggtt->base, view, - alignment, flags | PIN_GLOBAL); + return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); + i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); +} - WARN_ON(vma->pin_count == 0); - WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); +static __always_inline unsigned __busy_read_flag(unsigned int id) +{ + /* Note that we could alias engines in the execbuf API, but + * that would be very unwise as it prevents userspace from + * fine control over engine selection. Ahem. + * + * This should be something like EXEC_MAX_ENGINE instead of + * I915_NUM_ENGINES. + */ + BUILD_BUG_ON(I915_NUM_ENGINES > 16); + return 0x10000 << id; +} - --vma->pin_count; +static __always_inline unsigned int __busy_write_id(unsigned int id) +{ + return id; +} + +static __always_inline unsigned +__busy_set_if_active(const struct i915_gem_active *active, + unsigned int (*flag)(unsigned int id)) +{ + /* For more discussion about the barriers and locking concerns, + * see __i915_gem_active_get_rcu(). + */ + do { + struct drm_i915_gem_request *request; + unsigned int id; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; + + id = request->engine->exec_id; + + /* Check that the pointer wasn't reassigned and overwritten. */ + if (request == rcu_access_pointer(active->request)) + return flag(id); + } while (1); +} + +static inline unsigned +busy_check_reader(const struct i915_gem_active *active) +{ + return __busy_set_if_active(active, __busy_read_flag); +} + +static inline unsigned +busy_check_writer(const struct i915_gem_active *active) +{ + return __busy_set_if_active(active, __busy_write_id); } int @@ -4639,47 +3794,61 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - int ret; + unsigned long active; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { - ret = -ENOENT; - goto unlock; - } + args->busy = 0; + active = __I915_BO_ACTIVE(obj); + if (active) { + int idx; - /* Count all active objects as busy, even if they are currently not used - * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions, therefore emit any - * necessary flushes here. - */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto unref; + /* Yes, the lookups are intentionally racy. + * + * First, we cannot simply rely on __I915_BO_ACTIVE. We have + * to regard the value as stale and as our ABI guarantees + * forward progress, we confirm the status of each active + * request with the hardware. + * + * Even though we guard the pointer lookup by RCU, that only + * guarantees that the pointer and its contents remain + * dereferencable and does *not* mean that the request we + * have is the same as the one being tracked by the object. + * + * Consider that we lookup the request just as it is being + * retired and freed. We take a local copy of the pointer, + * but before we add its engine into the busy set, the other + * thread reallocates it and assigns it to a task on another + * engine with a fresh and incomplete seqno. + * + * So after we lookup the engine's id, we double check that + * the active request is the same and only then do we add it + * into the busy set. + */ + rcu_read_lock(); - args->busy = 0; - if (obj->active) { - int i; + for_each_active(active, idx) + args->busy |= busy_check_reader(&obj->last_read[idx]); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; + /* For ABI sanity, we only care that the write engine is in + * the set of read engines. This is ensured by the ordering + * of setting last_read/last_write in i915_vma_move_to_active, + * and then in reverse in retire. + * + * We don't care that the set of active read/write engines + * may change during construction of the result, as it is + * equally liable to change before userspace can inspect + * the result. + */ + args->busy |= busy_check_writer(&obj->last_write); - req = obj->last_read_req[i]; - if (req) - args->busy |= 1 << (16 + req->engine->exec_id); - } - if (obj->last_write_req) - args->busy |= obj->last_write_req->engine->exec_id; + rcu_read_unlock(); } -unref: - drm_gem_object_unreference(&obj->base); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return 0; } int @@ -4710,19 +3879,14 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file_priv, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } - if (i915_gem_obj_is_pinned(obj)) { - ret = -EINVAL; - goto out; - } - if (obj->pages && - obj->tiling_mode != I915_TILING_NONE && + i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->madv == I915_MADV_WILLNEED) i915_gem_object_unpin_pages(obj); @@ -4739,8 +3903,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, args->retained = obj->madv != __I915_MADV_PURGED; -out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -4753,7 +3916,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_list); for (i = 0; i < I915_NUM_ENGINES; i++) - INIT_LIST_HEAD(&obj->engine_list[i]); + init_request_active(&obj->last_read[i], + i915_gem_object_retire__read); + init_request_active(&obj->last_write, + i915_gem_object_retire__write); + init_request_active(&obj->last_fence, NULL); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4865,33 +4032,31 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) trace_i915_gem_object_destroy(obj); + /* All file-owned VMA should have been released by this point through + * i915_gem_close_object(), or earlier by i915_gem_context_close(). + * However, the object may also be bound into the global GTT (e.g. + * older GPUs without per-process support, or for direct access through + * the GTT either for the user or for scanout). Those VMA still need to + * unbound now. + */ list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { - int ret; - - vma->pin_count = 0; - ret = i915_vma_unbind(vma); - if (WARN_ON(ret == -ERESTARTSYS)) { - bool was_interruptible; - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - WARN_ON(i915_vma_unbind(vma)); - - dev_priv->mm.interruptible = was_interruptible; - } + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + vma->flags &= ~I915_VMA_PIN_MASK; + i915_vma_close(vma); } + GEM_BUG_ON(obj->bind_count); /* Stolen objects don't hold a ref, but do hold pin count. Fix that up * before progressing. */ if (obj->stolen) i915_gem_object_unpin_pages(obj); - WARN_ON(obj->frontbuffer_bits); + WARN_ON(atomic_read(&obj->frontbuffer_bits)); if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && - obj->tiling_mode != I915_TILING_NONE) + i915_gem_object_is_tiled(obj)) i915_gem_object_unpin_pages(obj); if (WARN_ON(obj->pages_pin_count)) @@ -4899,7 +4064,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (discard_backing_storage(obj)) obj->madv = I915_MADV_DONTNEED; i915_gem_object_put_pages(obj); - i915_gem_object_free_mmap_offset(obj); BUG_ON(obj->pages); @@ -4938,51 +4102,39 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, GEM_BUG_ON(!view); list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) + if (i915_vma_is_ggtt(vma) && + i915_ggtt_view_equal(&vma->ggtt_view, view)) return vma; return NULL; } -void i915_gem_vma_destroy(struct i915_vma *vma) -{ - WARN_ON(vma->node.allocated); - - /* Keep the vma as a placeholder in the execbuffer reservation lists */ - if (!list_empty(&vma->exec_list)) - return; - - if (!vma->is_ggtt) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - list_del(&vma->obj_link); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); -} - -static void -i915_gem_stop_engines(struct drm_device *dev) +int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; + int ret; - for_each_engine(engine, dev_priv) - dev_priv->gt.stop_engine(engine); -} - -int -i915_gem_suspend(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); - ret = i915_gem_wait_for_idle(dev_priv); + + /* We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the dev_priv->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err; + + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) goto err; i915_gem_retire_requests(dev_priv); - i915_gem_stop_engines(dev); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -5002,6 +4154,23 @@ err: return ret; } +void i915_gem_resume(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + mutex_lock(&dev->struct_mutex); + i915_gem_restore_gtt_mappings(dev); + + /* As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + if (i915.enable_execlists) + intel_lr_context_reset(dev_priv, dev_priv->kernel_context); + + mutex_unlock(&dev->struct_mutex); +} + void i915_gem_init_swizzling(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -5054,53 +4223,6 @@ static void init_unused_rings(struct drm_device *dev) } } -int i915_gem_init_engines(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - ret = intel_init_render_ring_buffer(dev); - if (ret) - return ret; - - if (HAS_BSD(dev)) { - ret = intel_init_bsd_ring_buffer(dev); - if (ret) - goto cleanup_render_ring; - } - - if (HAS_BLT(dev)) { - ret = intel_init_blt_ring_buffer(dev); - if (ret) - goto cleanup_bsd_ring; - } - - if (HAS_VEBOX(dev)) { - ret = intel_init_vebox_ring_buffer(dev); - if (ret) - goto cleanup_blt_ring; - } - - if (HAS_BSD2(dev)) { - ret = intel_init_bsd2_ring_buffer(dev); - if (ret) - goto cleanup_vebox_ring; - } - - return 0; - -cleanup_vebox_ring: - intel_cleanup_engine(&dev_priv->engine[VECS]); -cleanup_blt_ring: - intel_cleanup_engine(&dev_priv->engine[BCS]); -cleanup_bsd_ring: - intel_cleanup_engine(&dev_priv->engine[VCS]); -cleanup_render_ring: - intel_cleanup_engine(&dev_priv->engine[RCS]); - - return ret; -} - int i915_gem_init_hw(struct drm_device *dev) { @@ -5167,6 +4289,27 @@ out: return ret; } +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) +{ + if (INTEL_INFO(dev_priv)->gen < 6) + return false; + + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + + if (value >= 0) + return value; + +#ifdef CONFIG_INTEL_IOMMU + /* Enable semaphores on SNB when IO remapping is off */ + if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + return false; +#endif + + return true; +} + int i915_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -5175,15 +4318,9 @@ int i915_gem_init(struct drm_device *dev) mutex_lock(&dev->struct_mutex); if (!i915.enable_execlists) { - dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; - dev_priv->gt.init_engines = i915_gem_init_engines; - dev_priv->gt.cleanup_engine = intel_cleanup_engine; - dev_priv->gt.stop_engine = intel_stop_engine; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; } else { - dev_priv->gt.execbuf_submit = intel_execlists_submission; - dev_priv->gt.init_engines = intel_logical_rings_init; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; - dev_priv->gt.stop_engine = intel_logical_ring_stop; } /* This is just a security blanket to placate dragons. @@ -5195,19 +4332,22 @@ int i915_gem_init(struct drm_device *dev) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); i915_gem_init_userptr(dev_priv); - i915_gem_init_ggtt(dev); + + ret = i915_gem_init_ggtt(dev_priv); + if (ret) + goto out_unlock; ret = i915_gem_context_init(dev); if (ret) goto out_unlock; - ret = dev_priv->gt.init_engines(dev); + ret = intel_engines_init(dev); if (ret) goto out_unlock; ret = i915_gem_init_hw(dev); if (ret == -EIO) { - /* Allow ring initialisation to fail by marking the GPU as + /* Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ @@ -5236,7 +4376,6 @@ i915_gem_cleanup_engines(struct drm_device *dev) static void init_engine_lists(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); } @@ -5283,10 +4422,11 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->requests = kmem_cache_create("i915_gem_request", sizeof(struct drm_i915_gem_request), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, NULL); - INIT_LIST_HEAD(&dev_priv->vm_list); INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); @@ -5310,7 +4450,7 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->mm.interruptible = true; - mutex_init(&dev_priv->fb_tracking.lock); + spin_lock_init(&dev_priv->fb_tracking.lock); } void i915_gem_load_cleanup(struct drm_device *dev) @@ -5320,6 +4460,9 @@ void i915_gem_load_cleanup(struct drm_device *dev) kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); + + /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ + rcu_barrier(); } int i915_gem_freeze_late(struct drm_i915_private *dev_priv) @@ -5353,21 +4496,15 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. */ spin_lock(&file_priv->mm.lock); - while (!list_empty(&file_priv->mm.request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&file_priv->mm.request_list, - struct drm_i915_gem_request, - client_list); - list_del(&request->client_list); + list_for_each_entry(request, &file_priv->mm.request_list, client_list) request->file_priv = NULL; - } spin_unlock(&file_priv->mm.lock); if (!list_empty(&file_priv->rps.link)) { @@ -5396,7 +4533,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); - file_priv->bsd_ring = -1; + file_priv->bsd_engine = -1; ret = i915_gem_context_open(dev, file); if (ret) @@ -5418,16 +4555,23 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits) { + /* Control of individual bits within the mask are guarded by + * the owning plane->mutex, i.e. we can never see concurrent + * manipulation of individual bits. But since the bitfield as a whole + * is updated using RMW, we need to use atomics in order to update + * the bits. + */ + BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > + sizeof(atomic_t) * BITS_PER_BYTE); + if (old) { - WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); - WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); - old->frontbuffer_bits &= ~frontbuffer_bits; + WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); + atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); } if (new) { - WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); - WARN_ON(new->frontbuffer_bits & frontbuffer_bits); - new->frontbuffer_bits |= frontbuffer_bits; + WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); + atomic_or(frontbuffer_bits, &new->frontbuffer_bits); } } @@ -5441,7 +4585,7 @@ u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) continue; if (vma->vm == vm) @@ -5459,7 +4603,8 @@ u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) + if (i915_vma_is_ggtt(vma) && + i915_ggtt_view_equal(&vma->ggtt_view, view)) return vma->node.start; WARN(1, "global vma for this object not found. (view=%u)\n", view->type); @@ -5472,7 +4617,7 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o, struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) continue; if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) @@ -5488,7 +4633,7 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && i915_ggtt_view_equal(&vma->ggtt_view, view) && drm_mm_node_allocated(&vma->node)) return true; @@ -5496,17 +4641,6 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, return false; } -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (drm_mm_node_allocated(&vma->node)) - return true; - - return false; -} - unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) { struct i915_vma *vma; @@ -5514,7 +4648,7 @@ unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) GEM_BUG_ON(list_empty(&o->vma_list)); list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) return vma->node.size; } @@ -5526,7 +4660,7 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) return true; return false; @@ -5584,6 +4718,6 @@ i915_gem_object_create_from_data(struct drm_device *dev, return obj; fail: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 3752d5daa4b2..ed989596d9a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -41,15 +41,15 @@ /** * i915_gem_batch_pool_init() - initialize a batch buffer pool - * @dev: the drm device + * @engine: the associated request submission engine * @pool: the batch buffer pool */ -void i915_gem_batch_pool_init(struct drm_device *dev, +void i915_gem_batch_pool_init(struct intel_engine_cs *engine, struct i915_gem_batch_pool *pool) { int n; - pool->dev = dev; + pool->engine = engine; for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) INIT_LIST_HEAD(&pool->cache_list[n]); @@ -65,18 +65,17 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) { int n; - WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); + lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { - while (!list_empty(&pool->cache_list[n])) { - struct drm_i915_gem_object *obj = - list_first_entry(&pool->cache_list[n], - struct drm_i915_gem_object, - batch_pool_link); - - list_del(&obj->batch_pool_link); - drm_gem_object_unreference(&obj->base); - } + struct drm_i915_gem_object *obj, *next; + + list_for_each_entry_safe(obj, next, + &pool->cache_list[n], + batch_pool_link) + i915_gem_object_put(obj); + + INIT_LIST_HEAD(&pool->cache_list[n]); } } @@ -102,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, struct list_head *list; int n; - WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); + lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); /* Compute a power-of-two bucket, but throw everything greater than * 16KiB into the same bucket: i.e. the the buckets hold objects of @@ -115,13 +114,14 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list_for_each_entry_safe(tmp, next, list, batch_pool_link) { /* The batches are strictly LRU ordered */ - if (tmp->active) + if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id], + &tmp->base.dev->struct_mutex)) break; /* While we're looping, do some clean up */ if (tmp->madv == __I915_MADV_PURGED) { list_del(&tmp->batch_pool_link); - drm_gem_object_unreference(&tmp->base); + i915_gem_object_put(tmp); continue; } @@ -134,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (obj == NULL) { int ret; - obj = i915_gem_object_create(pool->dev, size); + obj = i915_gem_object_create(&pool->engine->i915->drm, size); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 848e90703eed..10d5ac4c00d3 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -27,13 +27,15 @@ #include "i915_drv.h" +struct intel_engine_cs; + struct i915_gem_batch_pool { - struct drm_device *dev; + struct intel_engine_cs *engine; struct list_head cache_list[4]; }; /* i915_gem_batch_pool.c */ -void i915_gem_batch_pool_init(struct drm_device *dev, +void i915_gem_batch_pool_init(struct intel_engine_cs *engine, struct i915_gem_batch_pool *pool); void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct drm_i915_gem_object* diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3c97f0e7a003..bb72af5320b0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -134,21 +134,6 @@ static int get_context_size(struct drm_i915_private *dev_priv) return ret; } -static void i915_gem_context_clean(struct i915_gem_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - struct i915_vma *vma, *next; - - if (!ppgtt) - return; - - list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list, - vm_link) { - if (WARN_ON(__i915_vma_unbind_no_wait(vma))) - break; - } -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -156,13 +141,7 @@ void i915_gem_context_free(struct kref *ctx_ref) lockdep_assert_held(&ctx->i915->drm.struct_mutex); trace_i915_context_free(ctx); - - /* - * This context is going away and we need to remove all VMAs still - * around. This is to handle imported shared objects for which - * destructor did not run when their handles were closed. - */ - i915_gem_context_clean(ctx); + GEM_BUG_ON(!ctx->closed); i915_ppgtt_put(ctx->ppgtt); @@ -173,10 +152,10 @@ void i915_gem_context_free(struct kref *ctx_ref) continue; WARN_ON(ce->pin_count); - if (ce->ringbuf) - intel_ringbuffer_free(ce->ringbuf); + if (ce->ring) + intel_ring_free(ce->ring); - drm_gem_object_unreference(&ce->state->base); + i915_gem_object_put(ce->state); } list_del(&ctx->link); @@ -216,7 +195,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); /* Failure shouldn't ever happen this early */ if (WARN_ON(ret)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } } @@ -224,6 +203,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } +static void i915_ppgtt_close(struct i915_address_space *vm) +{ + struct list_head *phases[] = { + &vm->active_list, + &vm->inactive_list, + &vm->unbound_list, + NULL, + }, **phase; + + GEM_BUG_ON(vm->closed); + vm->closed = true; + + for (phase = phases; *phase; phase++) { + struct i915_vma *vma, *vn; + + list_for_each_entry_safe(vma, vn, *phase, vm_link) + if (!i915_vma_is_closed(vma)) + i915_vma_close(vma); + } +} + +static void context_close(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(ctx->closed); + ctx->closed = true; + if (ctx->ppgtt) + i915_ppgtt_close(&ctx->ppgtt->base); + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_put(ctx); +} + static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; @@ -305,7 +315,7 @@ __create_hw_context(struct drm_device *dev, return ctx; err_out: - i915_gem_context_unreference(ctx); + context_close(ctx); return ERR_PTR(ret); } @@ -327,13 +337,14 @@ i915_gem_create_context(struct drm_device *dev, return ctx; if (USES_FULL_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); + struct i915_hw_ppgtt *ppgtt = + i915_ppgtt_create(to_i915(dev), file_priv); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_unreference(ctx); + context_close(ctx); return ERR_CAST(ppgtt); } @@ -390,7 +401,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx, if (ce->state) i915_gem_object_ggtt_unpin(ce->state); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } } @@ -504,7 +515,7 @@ void i915_gem_context_fini(struct drm_device *dev) lockdep_assert_held(&dev->struct_mutex); - i915_gem_context_unreference(dctx); + context_close(dctx); dev_priv->kernel_context = NULL; ida_destroy(&dev_priv->context_hw_ida); @@ -514,8 +525,7 @@ static int context_idr_cleanup(int id, void *p, void *data) { struct i915_gem_context *ctx = p; - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_unreference(ctx); + context_close(ctx); return 0; } @@ -552,11 +562,12 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ - i915_semaphore_is_enabled(dev_priv) ? + i915.semaphores ? hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : 0; int len, ret; @@ -567,7 +578,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } @@ -589,64 +600,64 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE); + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; - intel_ring_emit_reg(engine, + intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(engine, + intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); } } } - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_SET_CONTEXT); - intel_ring_emit(engine, + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | flags); /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(engine, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(engine, last_reg); - intel_ring_emit(engine, + intel_ring_emit_reg(ring, last_reg); + intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); } /* Insert a delay before the next switch! */ - intel_ring_emit(engine, + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, last_reg); - intel_ring_emit(engine, engine->scratch.gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit_reg(ring, last_reg); + intel_ring_emit(ring, engine->scratch.gtt_offset); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE); + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); } - intel_ring_advance(engine); + intel_ring_advance(ring); return ret; } @@ -654,7 +665,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) static int remap_l3(struct drm_i915_gem_request *req, int slice) { u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int i, ret; if (!remap_info) @@ -669,13 +680,13 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice) * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); - intel_ring_emit(engine, remap_info[i]); + intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); + intel_ring_emit(ring, remap_info[i]); } - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -752,9 +763,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return 0; /* Trying to pin first makes error handling easier. */ - ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state, - to->ggtt_alignment, - 0); + ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0, + to->ggtt_alignment, 0); if (ret) return ret; @@ -814,8 +824,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { - from->engine[RCS].state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->engine[RCS].state), req); + struct drm_i915_gem_object *obj = from->engine[RCS].state; + /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -823,14 +833,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ - from->engine[RCS].state->dirty = 1; + obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); /* obj is kept alive until the next request by its active ref */ - i915_gem_object_ggtt_unpin(from->engine[RCS].state); - i915_gem_context_unreference(from); + i915_gem_object_ggtt_unpin(obj); + i915_gem_context_put(from); } - i915_gem_context_reference(to); - engine->last_context = to; + engine->last_context = i915_gem_context_get(to); /* GEN8 does *not* require an explicit reload if the PDPs have been * setup, and we do not wish to move them. @@ -894,8 +904,9 @@ int i915_switch_context(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - WARN_ON(i915.enable_execlists); lockdep_assert_held(&req->i915->drm.struct_mutex); + if (i915.enable_execlists) + return 0; if (!req->ctx->engine[engine->id].state) { struct i915_gem_context *to = req->ctx; @@ -914,10 +925,9 @@ int i915_switch_context(struct drm_i915_gem_request *req) } if (to != engine->last_context) { - i915_gem_context_reference(to); if (engine->last_context) - i915_gem_context_unreference(engine->last_context); - engine->last_context = to; + i915_gem_context_put(engine->last_context); + engine->last_context = i915_gem_context_get(to); } return 0; @@ -926,6 +936,33 @@ int i915_switch_context(struct drm_i915_gem_request *req) return do_rcs_switch(req); } +int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) { + struct drm_i915_gem_request *req; + int ret; + + if (engine->last_context == NULL) + continue; + + if (engine->last_context == dev_priv->kernel_context) + continue; + + req = i915_gem_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(req)) + return PTR_ERR(req); + + ret = i915_switch_context(req); + i915_add_request_no_flush(req); + if (ret) + return ret; + } + + return 0; +} + static bool contexts_enabled(struct drm_device *dev) { return i915.enable_execlists || to_i915(dev)->hw_context_size; @@ -985,7 +1022,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, } idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_unreference(ctx); + context_close(ctx); mutex_unlock(&dev->struct_mutex); DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c deleted file mode 100644 index a56516482394..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Keith Packard <keithp@keithp.com> - * - */ - -#include <drm/drmP.h> -#include <drm/i915_drm.h> -#include "i915_drv.h" - -#if WATCH_LISTS -int -i915_verify_lists(struct drm_device *dev) -{ - static int warned; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - int err = 0; - - if (warned) - return 0; - - for_each_engine(engine, dev_priv) { - list_for_each_entry(obj, &engine->active_list, - engine_list[engine->id]) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("%s: freed active obj %p\n", - engine->name, obj); - err++; - break; - } else if (!obj->active || - obj->last_read_req[engine->id] == NULL) { - DRM_ERROR("%s: invalid active obj %p\n", - engine->name, obj); - err++; - } else if (obj->base.write_domain) { - DRM_ERROR("%s: invalid write obj %p (w %x)\n", - engine->name, - obj, obj->base.write_domain); - err++; - } - } - } - - return warned = err; -} -#endif /* WATCH_LIST */ diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 80bbe43a2e92..c60a8d5bbad0 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -23,9 +23,13 @@ * Authors: * Dave Airlie <airlied@redhat.com> */ + +#include <linux/dma-buf.h> +#include <linux/reservation.h> + #include <drm/drmP.h> + #include "i915_drv.h" -#include <linux/dma-buf.h> static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { @@ -218,25 +222,73 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; +static void export_fences(struct drm_i915_gem_object *obj, + struct dma_buf *dma_buf) +{ + struct reservation_object *resv = dma_buf->resv; + struct drm_i915_gem_request *req; + unsigned long active; + int idx; + + active = __I915_BO_ACTIVE(obj); + if (!active) + return; + + /* Serialise with execbuf to prevent concurrent fence-loops */ + mutex_lock(&obj->base.dev->struct_mutex); + + /* Mark the object for future fences before racily adding old fences */ + obj->base.dma_buf = dma_buf; + + ww_mutex_lock(&resv->lock, NULL); + + for_each_active(active, idx) { + req = i915_gem_active_get(&obj->last_read[idx], + &obj->base.dev->struct_mutex); + if (!req) + continue; + + if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + + i915_gem_request_put(req); + } + + req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); + if (req) { + reservation_object_add_excl_fence(resv, &req->fence); + i915_gem_request_put(req); + } + + ww_mutex_unlock(&resv->lock); + mutex_unlock(&obj->base.dev->struct_mutex); +} + struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dma_buf; exp_info.ops = &i915_dmabuf_ops; exp_info.size = gem_obj->size; exp_info.flags = flags; exp_info.priv = gem_obj; - if (obj->ops->dmabuf_export) { int ret = obj->ops->dmabuf_export(obj); if (ret) return ERR_PTR(ret); } - return dma_buf_export(&exp_info); + dma_buf = dma_buf_export(&exp_info); + if (IS_ERR(dma_buf)) + return dma_buf; + + export_fences(obj, dma_buf); + return dma_buf; } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) @@ -278,8 +330,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. */ - drm_gem_object_reference(&obj->base); - return &obj->base; + return &i915_gem_object_get(obj)->base; } } @@ -300,6 +351,16 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; + /* We use GTT as shorthand for a coherent domain, one that is + * neither in the GPU cache nor in the CPU cache, where all + * writes are immediately visible in memory. (That's not strictly + * true, but it's close! There are internal buffers such as the + * write-combined buffer or a delay through the chipset for GTT + * writes that do require us to treat GTT as a separate cache domain.) + */ + obj->base.read_domains = I915_GEM_DOMAIN_GTT; + obj->base.write_domain = 0; + return &obj->base; fail_detach: diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 3c1280ec7ff6..f76c06e92677 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,41 +33,23 @@ #include "intel_drv.h" #include "i915_trace.h" -static int switch_to_pinned_context(struct drm_i915_private *dev_priv) +static bool +gpu_is_idle(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - if (i915.enable_execlists) - return 0; - for_each_engine(engine, dev_priv) { - struct drm_i915_gem_request *req; - int ret; - - if (engine->last_context == NULL) - continue; - - if (engine->last_context == dev_priv->kernel_context) - continue; - - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); - - ret = i915_switch_context(req); - i915_add_request_no_flush(req); - if (ret) - return ret; + if (intel_engine_is_active(engine)) + return false; } - return 0; + return true; } - static bool mark_free(struct i915_vma *vma, struct list_head *unwind) { - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return false; if (WARN_ON(!list_empty(&vma->exec_list))) @@ -79,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) /** * i915_gem_evict_something - Evict vmas to make room for binding a new one - * @dev: drm_device * @vm: address space to evict from * @min_size: size of the desired free space * @alignment: alignment constraint of the desired free space @@ -102,42 +83,37 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) * memory in e.g. the shrinker. */ int -i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, - int min_size, unsigned alignment, unsigned cache_level, - unsigned long start, unsigned long end, +i915_gem_evict_something(struct i915_address_space *vm, + u64 min_size, u64 alignment, + unsigned cache_level, + u64 start, u64 end, unsigned flags) { - struct list_head eviction_list, unwind_list; - struct i915_vma *vma; - int ret = 0; - int pass = 0; + struct drm_i915_private *dev_priv = to_i915(vm->dev); + struct list_head eviction_list; + struct list_head *phases[] = { + &vm->inactive_list, + &vm->active_list, + NULL, + }, **phase; + struct i915_vma *vma, *next; + int ret; - trace_i915_gem_evict(dev, min_size, alignment, flags); + trace_i915_gem_evict(vm, min_size, alignment, flags); /* * The goal is to evict objects and amalgamate space in LRU order. * The oldest idle objects reside on the inactive list, which is in - * retirement order. The next objects to retire are those on the (per - * ring) active list that do not have an outstanding flush. Once the - * hardware reports completion (the seqno is updated after the - * batchbuffer has been finished) the clean buffer objects would - * be retired to the inactive list. Any dirty objects would be added - * to the tail of the flushing list. So after processing the clean - * active objects we need to emit a MI_FLUSH to retire the flushing - * list, hence the retirement order of the flushing list is in - * advance of the dirty objects on the active lists. + * retirement order. The next objects to retire are those in flight, + * on the active list, again in retirement order. * * The retirement sequence is thus: * 1. Inactive objects (already retired) - * 2. Clean active objects - * 3. Flushing list - * 4. Dirty active objects. + * 2. Active objects (will stall on unbinding) * * On each list, the oldest objects lie at the HEAD with the freshest * object on the TAIL. */ - - INIT_LIST_HEAD(&unwind_list); if (start != 0 || end != vm->total) { drm_mm_init_scan_with_range(&vm->mm, min_size, alignment, cache_level, @@ -145,96 +121,84 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, } else drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); -search_again: - /* First see if there is a large enough contiguous idle region... */ - list_for_each_entry(vma, &vm->inactive_list, vm_link) { - if (mark_free(vma, &unwind_list)) - goto found; - } - if (flags & PIN_NONBLOCK) - goto none; + phases[1] = NULL; - /* Now merge in the soon-to-be-expired objects... */ - list_for_each_entry(vma, &vm->active_list, vm_link) { - if (mark_free(vma, &unwind_list)) - goto found; - } +search_again: + INIT_LIST_HEAD(&eviction_list); + phase = phases; + do { + list_for_each_entry(vma, *phase, vm_link) + if (mark_free(vma, &eviction_list)) + goto found; + } while (*++phase); -none: /* Nothing found, clean up and bail out! */ - while (!list_empty(&unwind_list)) { - vma = list_first_entry(&unwind_list, - struct i915_vma, - exec_list); + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { ret = drm_mm_scan_remove_block(&vma->node); BUG_ON(ret); - list_del_init(&vma->exec_list); + INIT_LIST_HEAD(&vma->exec_list); } /* Can we unpin some objects such as idle hw contents, - * or pending flips? + * or pending flips? But since only the GGTT has global entries + * such as scanouts, rinbuffers and contexts, we can skip the + * purge when inspecting per-process local address spaces. */ - if (flags & PIN_NONBLOCK) + if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - /* Only idle the GPU and repeat the search once */ - if (pass++ == 0) { - struct drm_i915_private *dev_priv = to_i915(dev); - - if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(dev_priv); - if (ret) - return ret; - } - - ret = i915_gem_wait_for_idle(dev_priv); - if (ret) - return ret; - - i915_gem_retire_requests(dev_priv); - goto search_again; + if (gpu_is_idle(dev_priv)) { + /* If we still have pending pageflip completions, drop + * back to userspace to give our workqueues time to + * acquire our locks and unpin the old scanouts. + */ + return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC; } - /* If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. + /* Not everything in the GGTT is tracked via vma (otherwise we + * could evict as required with minimal stalling) so we are forced + * to idle the GPU and explicitly retire outstanding requests in + * the hopes that we can then remove contexts and the like only + * bound by their active reference. */ - return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC; + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + return ret; + + ret = i915_gem_wait_for_idle(dev_priv, true); + if (ret) + return ret; + + i915_gem_retire_requests(dev_priv); + goto search_again; found: /* drm_mm doesn't allow any other other operations while - * scanning, therefore store to be evicted objects on a - * temporary list. */ - INIT_LIST_HEAD(&eviction_list); - while (!list_empty(&unwind_list)) { - vma = list_first_entry(&unwind_list, - struct i915_vma, - exec_list); - if (drm_mm_scan_remove_block(&vma->node)) { - list_move(&vma->exec_list, &eviction_list); - drm_gem_object_reference(&vma->obj->base); - continue; - } - list_del_init(&vma->exec_list); + * scanning, therefore store to-be-evicted objects on a + * temporary list and take a reference for all before + * calling unbind (which may remove the active reference + * of any of our objects, thus corrupting the list). + */ + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + if (drm_mm_scan_remove_block(&vma->node)) + __i915_vma_pin(vma); + else + list_del_init(&vma->exec_list); } /* Unbinding will emit any required flushes */ while (!list_empty(&eviction_list)) { - struct drm_gem_object *obj; vma = list_first_entry(&eviction_list, struct i915_vma, exec_list); - obj = &vma->obj->base; list_del_init(&vma->exec_list); + __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); - - drm_gem_object_unreference(obj); } - return ret; } @@ -256,8 +220,8 @@ i915_gem_evict_for_vma(struct i915_vma *target) vma = container_of(node, typeof(*vma), node); - if (vma->pin_count) { - if (!vma->exec_entry || (vma->pin_count > 1)) + if (i915_vma_is_pinned(vma)) { + if (!vma->exec_entry || i915_vma_pin_count(vma) > 1) /* Object is pinned for some other use */ return -EBUSY; @@ -303,22 +267,21 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) struct drm_i915_private *dev_priv = to_i915(vm->dev); if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(dev_priv); + ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) return ret; } - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) return ret; i915_gem_retire_requests(dev_priv); - WARN_ON(!list_empty(&vm->active_list)); } list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) - if (vma->pin_count == 0) + if (!i915_vma_is_pinned(vma)) WARN_ON(i915_vma_unbind(vma)); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633e7549..c494b79ded20 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,21 +26,38 @@ * */ +#include <linux/dma_remapping.h> +#include <linux/reservation.h> +#include <linux/uaccess.h> + #include <drm/drmP.h> #include <drm/i915_drm.h> + #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" -#include <linux/dma_remapping.h> -#include <linux/uaccess.h> +#include "intel_frontbuffer.h" -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) +struct i915_execbuffer_params { + struct drm_device *dev; + struct drm_file *file; + struct i915_vma *batch; + u32 dispatch_flags; + u32 args_batch_start_offset; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct drm_i915_gem_request *request; +}; + struct eb_vmas { struct list_head vmas; int and; @@ -89,6 +106,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -122,7 +159,7 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); list_add_tail(&obj->obj_exec_link, &objects); } spin_unlock(&file->table_lock); @@ -175,7 +212,7 @@ err: struct drm_i915_gem_object, obj_exec_link); list_del_init(&obj->obj_exec_link); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } /* * Objects already transfered to the vmas list will be unreferenced by @@ -219,7 +256,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) i915_gem_object_unpin_fence(obj); if (entry->flags & __EXEC_OBJECT_HAS_PIN) - vma->pin_count--; + __i915_vma_unpin(vma); entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } @@ -234,7 +271,7 @@ static void eb_destroy(struct eb_vmas *eb) exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_gem_object_put(vma->obj); } kfree(eb); } @@ -399,6 +436,20 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj, return 0; } +static bool object_is_idle(struct drm_i915_gem_object *obj) +{ + unsigned long active = i915_gem_object_get_active(obj); + int idx; + + for_each_active(active, idx) { + if (!i915_gem_active_is_idle(&obj->last_read[idx], + &obj->base.dev->struct_mutex)) + return false; + } + + return true; +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, @@ -482,7 +533,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && pagefault_disabled()) + if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; if (use_cpu_reloc(obj)) @@ -626,12 +677,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->alignment, - flags & ~PIN_MAPPABLE); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~PIN_MAPPABLE); if (ret) return ret; @@ -667,7 +722,7 @@ need_reloc_mappable(struct i915_vma *vma) if (entry->relocation_count == 0) return false; - if (!vma->is_ggtt) + if (!i915_vma_is_ggtt(vma)) return false; /* See also use_cpu_reloc() */ @@ -686,12 +741,16 @@ eb_vma_misplaced(struct i915_vma *vma) struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; - WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_ggtt(vma)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; + if (vma->node.size < entry->pad_to_size) + return true; + if (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; @@ -725,8 +784,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; int retry; - i915_gem_retire_requests_ring(engine); - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; INIT_LIST_HEAD(&ordered_vmas); @@ -746,7 +803,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); if (entry->flags & EXEC_OBJECT_PINNED) @@ -843,7 +900,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_gem_object_put(vma->obj); } mutex_unlock(&dev->struct_mutex); @@ -937,11 +994,21 @@ err: return ret; } +static unsigned int eb_other_engines(struct drm_i915_gem_request *req) +{ + unsigned int mask; + + mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; + mask <<= I915_BO_ACTIVE_SHIFT; + + return mask; +} + static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned other_rings = ~intel_engine_flag(req->engine); + const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; @@ -950,8 +1017,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); + if (obj->flags & other_rings) { + ret = i915_gem_object_sync(obj, req); if (ret) return ret; } @@ -968,10 +1035,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return intel_ring_invalidate_all_caches(req); + /* Unconditionally invalidate GPU caches and TLBs. */ + return req->engine->emit_flush(req, EMIT_INVALIDATE); } static bool @@ -1007,6 +1072,9 @@ validate_exec_list(struct drm_device *dev, unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -1036,6 +1104,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; + /* pad_to_size was once a reserved field, so sanitize it */ + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (offset_in_page(exec[i].pad_to_size)) + return -EINVAL; + } else { + exec[i].pad_to_size = 0; + } + /* First check for malicious input causing overflow in * the worst case where we need to allocate the entire * relocation tree as a single array. @@ -1086,66 +1162,106 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -void +void i915_vma_move_to_active(struct i915_vma *vma, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + const unsigned int idx = req->engine->id; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + obj->dirty = 1; /* be paranoid */ + + /* Add a reference if we're newly entering the active list. + * The order in which we add operations to the retirement queue is + * vital here: mark_active adds to the start of the callback list, + * such that subsequent callbacks are called first. Therefore we + * add the active reference first and queue for it to be dropped + * *last*. + */ + if (!i915_gem_object_is_active(obj)) + i915_gem_object_get(obj); + i915_gem_object_set_active(obj, idx); + i915_gem_active_set(&obj->last_read[idx], req); + + if (flags & EXEC_OBJECT_WRITE) { + i915_gem_active_set(&obj->last_write, req); + + intel_fb_obj_invalidate(obj, ORIGIN_CS); + + /* update for the implicit flush after a batch */ + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + + if (flags & EXEC_OBJECT_NEEDS_FENCE) { + i915_gem_active_set(&obj->last_fence, req); + if (flags & __EXEC_OBJECT_HAS_FENCE) { + struct drm_i915_private *dev_priv = req->i915; + + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, + &dev_priv->mm.fence_list); + } + } + + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); + list_move_tail(&vma->vm_link, &vma->vm->active_list); +} + +static void eb_export_fence(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct reservation_object *resv; + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (!resv) + return; + + /* Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + ww_mutex_lock(&resv->lock, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &req->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + ww_mutex_unlock(&resv->lock); +} + +static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = i915_gem_request_get_engine(req); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; - obj->dirty = 1; /* be paranoid */ obj->base.write_domain = obj->base.pending_write_domain; - if (obj->base.write_domain == 0) + if (obj->base.write_domain) + vma->exec_entry->flags |= EXEC_OBJECT_WRITE; + else obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req); - if (obj->base.write_domain) { - i915_gem_request_assign(&obj->last_write_req, req); - - intel_fb_obj_invalidate(obj, ORIGIN_CS); - - /* update for the implicit flush after a batch */ - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - } - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_assign(&obj->last_fenced_req, req); - if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { - struct drm_i915_private *dev_priv = engine->i915; - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, - &dev_priv->mm.fence_list); - } - } - + i915_vma_move_to_active(vma, req, vma->exec_entry->flags); + eb_export_fence(obj, req, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } -static void -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) -{ - /* Unconditionally force add_request to emit a full flush. */ - params->engine->gpu_caches_dirty = true; - - /* Add a breadcrumb for the completion of the batch buffer */ - __i915_add_request(params->request, params->batch_obj, true); -} - static int -i915_reset_gen7_sol_offsets(struct drm_device *dev, - struct drm_i915_gem_request *req) +i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_ring *ring = req->ring; int ret, i; - if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) { + if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -1155,21 +1271,21 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return ret; for (i = 0; i < 4; i++) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(engine, 0); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); + intel_ring_emit(ring, 0); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } -static struct drm_i915_gem_object* +static struct i915_vma* i915_gem_execbuffer_parse(struct intel_engine_cs *engine, struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct eb_vmas *eb, struct drm_i915_gem_object *batch_obj, + struct eb_vmas *eb, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1181,18 +1297,18 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; - - ret = i915_parse_cmds(engine, - batch_obj, - shadow_batch_obj, - batch_start_offset, - batch_len, - is_master); + return ERR_CAST(shadow_batch_obj); + + ret = intel_engine_cmd_parser(engine, + batch_obj, + shadow_batch_obj, + batch_start_offset, + batch_len, + is_master); if (ret) goto err; - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); if (ret) goto err; @@ -1203,29 +1319,25 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma->exec_entry = shadow_exec_entry; vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; - drm_gem_object_reference(&shadow_batch_obj->base); + i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; + return vma; err: i915_gem_object_unpin_pages(shadow_batch_obj); if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; + return NULL; else return ERR_PTR(ret); } -int -i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +static int +execbuf_submit(struct i915_execbuffer_params *params, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas) { - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; u32 instp_mask; @@ -1239,34 +1351,31 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (ret) return ret; - WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<engine->id), - "%s didn't clear reload\n", engine->name); - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { + if (instp_mode != 0 && params->engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev)->gen < 4) { + if (INTEL_INFO(dev_priv)->gen < 4) { DRM_DEBUG("no rel constants on pre-gen4\n"); return -EINVAL; } - if (INTEL_INFO(dev)->gen > 5 && + if (INTEL_INFO(dev_priv)->gen > 5 && instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); return -EINVAL; } /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev_priv)->gen >= 6) instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; @@ -1275,37 +1384,39 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, return -EINVAL; } - if (engine == &dev_priv->engine[RCS] && + if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { + struct intel_ring *ring = params->request->ring; + ret = intel_ring_begin(params->request, 4); if (ret) return ret; - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, INSTPM); - intel_ring_emit(engine, instp_mask << 16 | instp_mode); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, INSTPM); + intel_ring_emit(ring, instp_mask << 16 | instp_mode); + intel_ring_advance(ring); dev_priv->relative_constants_mode = instp_mode; } if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(dev, params->request); + ret = i915_reset_gen7_sol_offsets(params->request); if (ret) return ret; } exec_len = args->batch_len; - exec_start = params->batch_obj_vm_offset + + exec_start = params->batch->node.start + params->args_batch_start_offset; if (exec_len == 0) - exec_len = params->batch_obj->base.size; + exec_len = params->batch->size; - ret = engine->dispatch_execbuffer(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = params->engine->emit_bb_start(params->request, + exec_start, exec_len, + params->dispatch_flags); if (ret) return ret; @@ -1318,43 +1429,24 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, /** * Find one BSD ring to dispatch the corresponding BSD command. - * The ring index is returned. + * The engine index is returned. */ static unsigned int -gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, + struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; /* Check whether the file_priv has already selected one ring. */ - if ((int)file_priv->bsd_ring < 0) { + if ((int)file_priv->bsd_engine < 0) { /* If not, use the ping-pong mechanism to select one. */ mutex_lock(&dev_priv->drm.struct_mutex); - file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; - dev_priv->mm.bsd_ring_dispatch_index ^= 1; + file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index; + dev_priv->mm.bsd_engine_dispatch_index ^= 1; mutex_unlock(&dev_priv->drm.struct_mutex); } - return file_priv->bsd_ring; -} - -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; + return file_priv->bsd_engine; } #define I915_USER_RINGS (4) @@ -1367,31 +1459,31 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS }; -static int -eb_select_ring(struct drm_i915_private *dev_priv, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct intel_engine_cs **ring) +static struct intel_engine_cs * +eb_select_engine(struct drm_i915_private *dev_priv, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return NULL; } if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -1399,20 +1491,20 @@ eb_select_ring(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return NULL; } - *ring = &dev_priv->engine[_VCS(bsd_idx)]; + engine = &dev_priv->engine[_VCS(bsd_idx)]; } else { - *ring = &dev_priv->engine[user_ring_map[user_ring_id]]; + engine = &dev_priv->engine[user_ring_map[user_ring_id]]; } - if (!intel_engine_initialized(*ring)) { + if (!intel_engine_initialized(engine)) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } - return 0; + return engine; } static int @@ -1423,9 +1515,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_request *req = NULL; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *engine; struct i915_gem_context *ctx; @@ -1454,9 +1544,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) dispatch_flags |= I915_DISPATCH_PINNED; - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); @@ -1496,7 +1586,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); if (ctx->ppgtt) vm = &ctx->ppgtt->base; @@ -1507,7 +1597,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, eb = eb_create(args); if (eb == NULL) { - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; @@ -1519,7 +1609,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); + params->batch = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1543,34 +1633,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (params->batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } params->args_batch_start_offset = args->batch_start_offset; - if (i915_needs_cmd_parser(engine) && args->batch_len) { - struct drm_i915_gem_object *parsed_batch_obj; - - parsed_batch_obj = i915_gem_execbuffer_parse(engine, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); - if (IS_ERR(parsed_batch_obj)) { - ret = PTR_ERR(parsed_batch_obj); + if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, + params->batch->obj, + eb, + args->batch_start_offset, + args->batch_len, + drm_is_current_master(file)); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } - /* - * parsed_batch_obj == batch_obj means batch not fully parsed: - * Accept, but don't promote to secure. - */ - - if (parsed_batch_obj != batch_obj) { + if (vma) { /* * Batch parsed and accepted: * @@ -1582,16 +1666,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; - batch_obj = parsed_batch_obj; + params->batch = vma; } } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = params->batch->obj; + /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1602,22 +1688,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (ret) goto err; - params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); - } else - params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); + params->batch = i915_gem_obj_to_ggtt(obj); + } /* Allocate a request for this batch buffer nice and early. */ - req = i915_gem_request_alloc(engine, ctx); - if (IS_ERR(req)) { - ret = PTR_ERR(req); + params->request = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(params->request)) { + ret = PTR_ERR(params->request); goto err_batch_unpin; } - ret = i915_gem_request_add_to_client(req, file); + ret = i915_gem_request_add_to_client(params->request, file); if (ret) goto err_request; @@ -1631,13 +1716,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->file = file; params->engine = engine; params->dispatch_flags = dispatch_flags; - params->batch_obj = batch_obj; params->ctx = ctx; - params->request = req; - ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); + ret = execbuf_submit(params, args, &eb->vmas); err_request: - i915_gem_execbuffer_retire_commands(params); + __i915_add_request(params->request, params->batch->obj, ret == 0); err_batch_unpin: /* @@ -1647,11 +1730,10 @@ err_batch_unpin: * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); - + i915_vma_unpin(params->batch); err: /* the request owns the ref now */ - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); eb_destroy(eb); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 251d7a95af89..9e8173fe2a09 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -86,20 +86,22 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, if (obj) { u32 size = i915_gem_obj_ggtt_size(obj); + unsigned int tiling = i915_gem_object_get_tiling(obj); + unsigned int stride = i915_gem_object_get_stride(obj); uint64_t val; /* Adjust fence size to match tiled area */ - if (obj->tiling_mode != I915_TILING_NONE) { - uint32_t row_size = obj->stride * - (obj->tiling_mode == I915_TILING_Y ? 32 : 8); + if (tiling != I915_TILING_NONE) { + uint32_t row_size = stride * + (tiling == I915_TILING_Y ? 32 : 8); size = (size / row_size) * row_size; } val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 0xfffff000) << 32; val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) + val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift; + if (tiling == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; @@ -122,6 +124,8 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, if (obj) { u32 size = i915_gem_obj_ggtt_size(obj); + unsigned int tiling = i915_gem_object_get_tiling(obj); + unsigned int stride = i915_gem_object_get_stride(obj); int pitch_val; int tile_width; @@ -131,17 +135,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) tile_width = 128; else tile_width = 512; /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; + pitch_val = stride / tile_width; pitch_val = ffs(pitch_val) - 1; val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) + if (tiling == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I915_FENCE_SIZE_BITS(size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; @@ -161,6 +165,8 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, if (obj) { u32 size = i915_gem_obj_ggtt_size(obj); + unsigned int tiling = i915_gem_object_get_tiling(obj); + unsigned int stride = i915_gem_object_get_stride(obj); uint32_t pitch_val; WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || @@ -169,11 +175,11 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", i915_gem_obj_ggtt_offset(obj), size); - pitch_val = obj->stride / 128; + pitch_val = stride / 128; pitch_val = ffs(pitch_val) - 1; val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) + if (tiling == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I830_FENCE_SIZE_BITS(size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; @@ -201,9 +207,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) mb(); - WARN(obj && (!obj->stride || !obj->tiling_mode), + WARN(obj && + (!i915_gem_object_get_stride(obj) || + !i915_gem_object_get_tiling(obj)), "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - obj->stride, obj->tiling_mode); + i915_gem_object_get_stride(obj), + i915_gem_object_get_tiling(obj)); if (IS_GEN2(dev)) i830_write_fence_reg(dev, reg, obj); @@ -248,7 +257,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { - if (obj->tiling_mode) + if (i915_gem_object_is_tiled(obj)) i915_gem_release_mmap(obj); /* As we do not have an associated fence register, we will force @@ -261,15 +270,8 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static int i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); - if (ret) - return ret; - - i915_gem_request_assign(&obj->last_fenced_req, NULL); - } - - return 0; + return i915_gem_active_retire(&obj->last_fence, + &obj->base.dev->struct_mutex); } /** @@ -368,7 +370,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - bool enable = obj->tiling_mode != I915_TILING_NONE; + bool enable = i915_gem_object_is_tiled(obj); struct drm_i915_fence_reg *reg; int ret; @@ -438,7 +440,7 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) WARN_ON(!ggtt_vma || dev_priv->fence_regs[obj->fence_reg].pin_count > - ggtt_vma->pin_count); + i915_vma_pin_count(ggtt_vma)); dev_priv->fence_regs[obj->fence_reg].pin_count++; return true; } else @@ -484,7 +486,7 @@ void i915_gem_restore_fences(struct drm_device *dev) */ if (reg->obj) { i915_gem_object_update_fence(reg->obj, reg, - reg->obj->tiling_mode); + i915_gem_object_get_tiling(reg->obj)); } else { i915_gem_write_fence(dev, i, NULL); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10f1e32767e6..18c7c9644761 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -184,7 +184,7 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) { vma->vm->clear_range(vma->vm, vma->node.start, - vma->obj->base.size, + vma->size, true); } @@ -669,6 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; @@ -678,13 +679,13 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(engine, upper_32_bits(addr)); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(engine, lower_32_bits(addr)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); + intel_ring_emit(ring, upper_32_bits(addr)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); + intel_ring_emit(ring, lower_32_bits(addr)); + intel_ring_advance(ring); return 0; } @@ -1660,11 +1661,12 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1672,13 +1674,13 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(engine, PP_DIR_DCLV_2G); - intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); - intel_ring_emit(engine, get_pd_offset(ppgtt)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1686,11 +1688,12 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1698,17 +1701,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(engine, PP_DIR_DCLV_2G); - intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); - intel_ring_emit(engine, get_pd_offset(ppgtt)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; } @@ -2009,7 +2012,7 @@ alloc: 0, ggtt->base.total, DRM_MM_TOPDOWN); if (ret == -ENOSPC && !retried) { - ret = i915_gem_evict_something(dev, &ggtt->base, + ret = i915_gem_evict_something(&ggtt->base, GEN6_PD_SIZE, GEN6_PD_ALIGN, I915_CACHE_NONE, 0, ggtt->base.total, @@ -2101,11 +2104,12 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv) { - ppgtt->base.dev = dev; + ppgtt->base.dev = &dev_priv->drm; - if (INTEL_INFO(dev)->gen < 8) + if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); else return gen8_ppgtt_init(ppgtt); @@ -2115,9 +2119,9 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv) { drm_mm_init(&vm->mm, vm->start, vm->total); - vm->dev = &dev_priv->drm; INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); + INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); } @@ -2140,15 +2144,17 @@ static void gtt_write_workarounds(struct drm_device *dev) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv, + struct drm_i915_file_private *file_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + int ret; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret == 0) { kref_init(&ppgtt->ref); i915_address_space_init(&ppgtt->base, dev_priv); + ppgtt->base.file = file_priv; } return ret; @@ -2180,7 +2186,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev) } struct i915_hw_ppgtt * -i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +i915_ppgtt_create(struct drm_i915_private *dev_priv, + struct drm_i915_file_private *fpriv) { struct i915_hw_ppgtt *ppgtt; int ret; @@ -2189,14 +2196,12 @@ i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(dev, ppgtt); + ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } - ppgtt->file_priv = fpriv; - trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2209,9 +2214,10 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound */ + /* vmas should already be unbound and destroyed */ WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + WARN_ON(!list_empty(&ppgtt->base.unbound_list)); list_del(&ppgtt->base.global_link); drm_mm_takedown(&ppgtt->base.mm); @@ -2220,47 +2226,21 @@ void i915_ppgtt_release(struct kref *kref) kfree(ppgtt); } -extern int intel_iommu_gfx_mapped; /* Certain Gen5 chipsets require require idling the GPU before * unmapping anything from the GTT when VT-d is enabled. */ -static bool needs_idle_maps(struct drm_device *dev) +static bool needs_idle_maps(struct drm_i915_private *dev_priv) { #ifdef CONFIG_INTEL_IOMMU /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) + if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) return true; #endif return false; } -static bool do_idling(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - bool ret = dev_priv->mm.interruptible; - - if (unlikely(ggtt->do_idle_maps)) { - dev_priv->mm.interruptible = false; - if (i915_gem_wait_for_idle(dev_priv)) { - DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); - /* Wait a bit, in hopes it avoids the hang */ - udelay(10); - } - } - - return ret; -} - -static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - if (unlikely(ggtt->do_idle_maps)) - dev_priv->mm.interruptible = interruptible; -} - void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2647,7 +2627,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally * upgrade to both bound if we bind either to avoid double-binding. */ - vma->bound |= GLOBAL_BIND | LOCAL_BIND; + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -2669,14 +2649,14 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, pte_flags |= PTE_READ_ONLY; - if (flags & GLOBAL_BIND) { + if (flags & I915_VMA_GLOBAL_BIND) { vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, vma->node.start, cache_level, pte_flags); } - if (flags & LOCAL_BIND) { + if (flags & I915_VMA_LOCAL_BIND) { struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, @@ -2690,42 +2670,36 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { - struct drm_device *dev = vma->vm->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj = vma->obj; - const uint64_t size = min_t(uint64_t, - obj->base.size, - vma->node.size); + struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + const u64 size = min(vma->size, vma->node.size); - if (vma->bound & GLOBAL_BIND) { + if (vma->flags & I915_VMA_GLOBAL_BIND) vma->vm->clear_range(vma->vm, - vma->node.start, - size, + vma->node.start, size, true); - } - - if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) appgtt->base.clear_range(&appgtt->base, - vma->node.start, - size, + vma->node.start, size, true); - } } void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - bool interruptible; + struct i915_ggtt *ggtt = &dev_priv->ggtt; - interruptible = do_idling(dev_priv); + if (unlikely(ggtt->do_idle_maps)) { + if (i915_gem_wait_for_idle(dev_priv, false)) { + DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); + /* Wait a bit, in hopes it avoids the hang */ + udelay(10); + } + } dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, PCI_DMA_BIDIRECTIONAL); - - undo_idling(dev_priv, interruptible); } static void i915_gtt_color_adjust(struct drm_mm_node *node, @@ -2736,19 +2710,14 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, if (node->color != color) *start += 4096; - if (!list_empty(&node->node_list)) { - node = list_entry(node->node_list.next, - struct drm_mm_node, - node_list); - if (node->allocated && node->color != color) - *end -= 4096; - } + node = list_first_entry_or_null(&node->node_list, + struct drm_mm_node, + node_list); + if (node && node->allocated && node->color != color) + *end -= 4096; } -static int i915_gem_setup_global_gtt(struct drm_device *dev, - u64 start, - u64 mappable_end, - u64 end) +int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. * @@ -2759,48 +2728,15 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, * aperture. One page should be enough to keep any prefetching inside * of the aperture. */ - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_mm_node *entry; - struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + struct drm_mm_node *entry; int ret; - BUG_ON(mappable_end > end); - - ggtt->base.start = start; - - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm */ - ggtt->base.total = end - start - PAGE_SIZE; - i915_address_space_init(&ggtt->base, dev_priv); - ggtt->base.total += PAGE_SIZE; - ret = intel_vgt_balloon(dev_priv); if (ret) return ret; - if (!HAS_LLC(dev)) - ggtt->base.mm.color_adjust = i915_gtt_color_adjust; - - /* Mark any preallocated objects as occupied */ - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base); - - DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", - i915_gem_obj_ggtt_offset(obj), obj->base.size); - - WARN_ON(i915_gem_obj_ggtt_bound(obj)); - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("Reservation failed: %i\n", ret); - return ret; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); - } - /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2810,18 +2746,19 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, } /* And finally clear the reserved guard page */ - ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); + ggtt->base.clear_range(&ggtt->base, + ggtt->base.total - PAGE_SIZE, PAGE_SIZE, + true); - if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { + if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { struct i915_hw_ppgtt *ppgtt; ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); if (!ppgtt) return -ENOMEM; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { - ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); return ret; } @@ -2849,33 +2786,21 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, } /** - * i915_gem_init_ggtt - Initialize GEM for Global GTT - * @dev: DRM device - */ -void i915_gem_init_ggtt(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total); -} - -/** * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization - * @dev: DRM device + * @dev_priv: i915 device */ -void i915_ggtt_cleanup_hw(struct drm_device *dev) +void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; if (dev_priv->mm.aliasing_ppgtt) { struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); } - i915_gem_cleanup_stolen(dev); + i915_gem_cleanup_stolen(&dev_priv->drm); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); @@ -2885,6 +2810,9 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev) } ggtt->base.cleanup(&ggtt->base); + + arch_phys_wc_del(ggtt->mtrr); + io_mapping_free(ggtt->mappable); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -2965,17 +2893,14 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) return (gen9_gmch_ctl - 0xf0 + 1) << 22; } -static int ggtt_probe_common(struct drm_device *dev, - size_t gtt_size) +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct pci_dev *pdev = ggtt->base.dev->pdev; struct i915_page_scratch *scratch_page; - phys_addr_t ggtt_phys_addr; + phys_addr_t phys_addr; /* For Modern GENs the PTEs and register space are split in the BAR */ - ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + - (pci_resource_len(dev->pdev, 0) / 2); + phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* * On BXT writes larger than 64 bit to the GTT pagetable range will be @@ -2984,16 +2909,16 @@ static int ggtt_probe_common(struct drm_device *dev, * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_BROXTON(dev)) - ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); + if (IS_BROXTON(ggtt->base.dev)) + ggtt->gsm = ioremap_nocache(phys_addr, size); else - ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); + ggtt->gsm = ioremap_wc(phys_addr, size); if (!ggtt->gsm) { - DRM_ERROR("Failed to map the gtt page table\n"); + DRM_ERROR("Failed to map the ggtt page table\n"); return -ENOMEM; } - scratch_page = alloc_scratch_page(dev); + scratch_page = alloc_scratch_page(ggtt->base.dev); if (IS_ERR(scratch_page)) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -3079,42 +3004,49 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); } +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + free_scratch_page(vm->dev, vm->scratch_page); +} + static int gen8_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int size; u16 snb_gmch_ctl; - int ret; /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->mappable_base = pci_resource_start(dev->pdev, 2); - ggtt->mappable_end = pci_resource_len(dev->pdev, 2); + ggtt->mappable_base = pci_resource_start(pdev, 2); + ggtt->mappable_end = pci_resource_len(pdev, 2); - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev)) { + size = gen8_get_total_gtt_size(snb_gmch_ctl); + } else if (IS_CHERRYVIEW(dev_priv)) { ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); - ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); + size = chv_get_total_gtt_size(snb_gmch_ctl); } else { ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); + size = gen8_get_total_gtt_size(snb_gmch_ctl); } - ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; + ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; - if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) chv_setup_private_ppat(dev_priv); else bdw_setup_private_ppat(dev_priv); - ret = ggtt_probe_common(dev, ggtt->size); - + ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; ggtt->base.insert_page = gen8_ggtt_insert_page; @@ -3126,57 +3058,65 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) if (IS_CHERRYVIEW(dev_priv)) ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; - return ret; + return ggtt_probe_common(ggtt, size); } static int gen6_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int size; u16 snb_gmch_ctl; - int ret; - ggtt->mappable_base = pci_resource_start(dev->pdev, 2); - ggtt->mappable_end = pci_resource_len(dev->pdev, 2); + ggtt->mappable_base = pci_resource_start(pdev, 2); + ggtt->mappable_end = pci_resource_len(pdev, 2); /* 64/512MB is the current min/max we actually know of, but this is just * a coarse sanity check. */ - if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); return -ENXIO; } - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; - ret = ggtt_probe_common(dev, ggtt->size); + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; ggtt->base.clear_range = gen6_ggtt_clear_range; ggtt->base.insert_page = gen6_ggtt_insert_page; ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.cleanup = gen6_gmch_remove; + + if (HAS_EDRAM(dev_priv)) + ggtt->base.pte_encode = iris_pte_encode; + else if (IS_HASWELL(dev_priv)) + ggtt->base.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(dev_priv)) + ggtt->base.pte_encode = byt_pte_encode; + else if (INTEL_GEN(dev_priv) >= 7) + ggtt->base.pte_encode = ivb_pte_encode; + else + ggtt->base.pte_encode = snb_pte_encode; - return ret; + return ggtt_probe_common(ggtt, size); } -static void gen6_gmch_remove(struct i915_address_space *vm) +static void i915_gmch_remove(struct i915_address_space *vm) { - struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); - - iounmap(ggtt->gsm); - free_scratch_page(vm->dev, vm->scratch_page); + intel_gmch_remove(); } static int i915_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); int ret; ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); @@ -3188,12 +3128,13 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, &ggtt->mappable_base, &ggtt->mappable_end); - ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm); + ggtt->do_idle_maps = needs_idle_maps(dev_priv); ggtt->base.insert_page = i915_ggtt_insert_page; ggtt->base.insert_entries = i915_ggtt_insert_entries; ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.cleanup = i915_gmch_remove; if (unlikely(ggtt->do_idle_maps)) DRM_INFO("applying Ironlake quirks for intel_iommu\n"); @@ -3201,65 +3142,40 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) return 0; } -static void i915_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - /** - * i915_ggtt_init_hw - Initialize GGTT hardware - * @dev: DRM device + * i915_ggtt_probe_hw - Probe GGTT hardware location + * @dev_priv: i915 device */ -int i915_ggtt_init_hw(struct drm_device *dev) +int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; - if (INTEL_INFO(dev)->gen <= 5) { - ggtt->probe = i915_gmch_probe; - ggtt->base.cleanup = i915_gmch_remove; - } else if (INTEL_INFO(dev)->gen < 8) { - ggtt->probe = gen6_gmch_probe; - ggtt->base.cleanup = gen6_gmch_remove; - - if (HAS_EDRAM(dev)) - ggtt->base.pte_encode = iris_pte_encode; - else if (IS_HASWELL(dev)) - ggtt->base.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(dev)) - ggtt->base.pte_encode = byt_pte_encode; - else if (INTEL_INFO(dev)->gen >= 7) - ggtt->base.pte_encode = ivb_pte_encode; - else - ggtt->base.pte_encode = snb_pte_encode; - } else { - ggtt->probe = gen8_gmch_probe; - ggtt->base.cleanup = gen6_gmch_remove; - } + ggtt->base.dev = &dev_priv->drm; - ggtt->base.dev = dev; - ggtt->base.is_ggtt = true; - - ret = ggtt->probe(ggtt); + if (INTEL_GEN(dev_priv) <= 5) + ret = i915_gmch_probe(ggtt); + else if (INTEL_GEN(dev_priv) < 8) + ret = gen6_gmch_probe(ggtt); + else + ret = gen8_gmch_probe(ggtt); if (ret) return ret; if ((ggtt->base.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" - "of address space! Found %lldM!\n", + " of address space! Found %lldM!\n", ggtt->base.total >> 20); ggtt->base.total = 1ULL << 32; ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); } - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - ret = i915_gem_init_stolen(dev); - if (ret) - goto out_gtt_cleanup; + if (ggtt->mappable_end > ggtt->base.total) { + DRM_ERROR("mappable aperture extends past end of GGTT," + " aperture=%llx, total=%llx\n", + ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = ggtt->base.total; + } /* GMADR is the PCI mmio aperture into the global GTT. */ DRM_INFO("Memory usable by graphics device = %lluM\n", @@ -3272,16 +3188,55 @@ int i915_ggtt_init_hw(struct drm_device *dev) #endif return 0; +} + +/** + * i915_ggtt_init_hw - Initialize GGTT hardware + * @dev_priv: i915 device + */ +int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) +{ + struct i915_ggtt *ggtt = &dev_priv->ggtt; + int ret; + + INIT_LIST_HEAD(&dev_priv->vm_list); + + /* Subtract the guard page before address space initialization to + * shrink the range used by drm_mm. + */ + ggtt->base.total -= PAGE_SIZE; + i915_address_space_init(&ggtt->base, dev_priv); + ggtt->base.total += PAGE_SIZE; + if (!HAS_LLC(dev_priv)) + ggtt->base.mm.color_adjust = i915_gtt_color_adjust; + + ggtt->mappable = + io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end); + if (!ggtt->mappable) { + ret = -EIO; + goto out_gtt_cleanup; + } + + ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); + + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + ret = i915_gem_init_stolen(&dev_priv->drm); + if (ret) + goto out_gtt_cleanup; + + return 0; out_gtt_cleanup: ggtt->base.cleanup(&ggtt->base); - return ret; } -int i915_ggtt_enable_hw(struct drm_device *dev) +int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) + if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) return -EIO; return 0; @@ -3331,7 +3286,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct i915_hw_ppgtt *ppgtt; - if (vm->is_ggtt) + if (i915_is_ggtt(vm)) ppgtt = dev_priv->mm.aliasing_ppgtt; else ppgtt = i915_vm_to_ppgtt(vm); @@ -3344,31 +3299,88 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } +static void +i915_vma_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) +{ + const unsigned int idx = rq->engine->id; + struct i915_vma *vma = + container_of(active, struct i915_vma, last_read[idx]); + + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); + + i915_vma_clear_active(vma, idx); + if (i915_vma_is_active(vma)) + return; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) + WARN_ON(i915_vma_unbind(vma)); +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!i915_vma_is_closed(vma)); + + list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + +void i915_vma_close(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + list_del_init(&vma->obj_link); + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) + WARN_ON(i915_vma_unbind(vma)); +} + static struct i915_vma * __i915_gem_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view) + const struct i915_ggtt_view *view) { struct i915_vma *vma; + int i; + + GEM_BUG_ON(vm->closed); - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) + if (WARN_ON(i915_is_ggtt(vm) != !!view)) return ERR_PTR(-EINVAL); vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->vm_link); INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->exec_list); + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) + init_request_active(&vma->last_read[i], i915_vma_retire); + list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; - vma->is_ggtt = i915_is_ggtt(vm); - - if (i915_is_ggtt(vm)) - vma->ggtt_view = *ggtt_view; - else + vma->size = obj->base.size; + + if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; + vma->ggtt_view = *view; + if (view->type == I915_GGTT_VIEW_PARTIAL) { + vma->size = view->params.partial.size; + vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_ROTATED) { + vma->size = + intel_rotation_info_size(&view->params.rotated); + vma->size <<= PAGE_SHIFT; + } + } else { i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + } list_add_tail(&vma->obj_link, &obj->vma_list); @@ -3398,9 +3410,12 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); + GEM_BUG_ON(!view); + if (!vma) vma = __i915_gem_vma_create(obj, &ggtt->base, view); + GEM_BUG_ON(i915_vma_is_closed(vma)); return vma; } @@ -3611,34 +3626,32 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int ret; u32 bind_flags; + u32 vma_flags; + int ret; if (WARN_ON(flags == 0)) return -EINVAL; bind_flags = 0; if (flags & PIN_GLOBAL) - bind_flags |= GLOBAL_BIND; + bind_flags |= I915_VMA_GLOBAL_BIND; if (flags & PIN_USER) - bind_flags |= LOCAL_BIND; + bind_flags |= I915_VMA_LOCAL_BIND; + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); if (flags & PIN_UPDATE) - bind_flags |= vma->bound; + bind_flags |= vma_flags; else - bind_flags &= ~vma->bound; - + bind_flags &= ~vma_flags; if (bind_flags == 0) return 0; - if (vma->bound == 0 && vma->vm->allocate_va_range) { - /* XXX: i915_vma_pin() will fix this +- hack */ - vma->pin_count++; + if (vma_flags == 0 && vma->vm->allocate_va_range) { trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->node.size); - vma->pin_count--; if (ret) return ret; } @@ -3647,44 +3660,20 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (ret) return ret; - vma->bound |= bind_flags; - + vma->flags |= bind_flags; return 0; } -/** - * i915_ggtt_view_size - Get the size of a GGTT view. - * @obj: Object the view is of. - * @view: The view in question. - * - * @return The size of the GGTT view in bytes. - */ -size_t -i915_ggtt_view_size(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - if (view->type == I915_GGTT_VIEW_NORMAL) { - return obj->base.size; - } else if (view->type == I915_GGTT_VIEW_ROTATED) { - return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_PARTIAL) { - return view->params.partial.size << PAGE_SHIFT; - } else { - WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); - return obj->base.size; - } -} - void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; lockdep_assert_held(&vma->vm->dev->struct_mutex); if (WARN_ON(!vma->obj->map_and_fenceable)) - return ERR_PTR(-ENODEV); + return IO_ERR_PTR(-ENODEV); - GEM_BUG_ON(!vma->is_ggtt); - GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); ptr = vma->iomap; if (ptr == NULL) { @@ -3692,11 +3681,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) vma->node.start, vma->node.size); if (ptr == NULL) - return ERR_PTR(-ENOMEM); + return IO_ERR_PTR(-ENOMEM); vma->iomap = ptr; } - vma->pin_count++; + __i915_vma_pin(vma); return ptr; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index aa5f31d1c2ed..cc56206a1600 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,6 +36,8 @@ #include <linux/io-mapping.h> +#include "i915_gem_request.h" + struct drm_i915_file_private; typedef uint32_t gen6_pte_t; @@ -178,12 +180,32 @@ struct i915_vma { struct drm_i915_gem_object *obj; struct i915_address_space *vm; void __iomem *iomap; + u64 size; + + unsigned int flags; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, execbuffer + * (objects are not allowed multiple times for the same batchbuffer), + * and the framebuffer code. When switching/pageflipping, the + * framebuffer code has at most two buffers pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ +#define I915_VMA_PIN_MASK 0xf +#define I915_VMA_PIN_OVERFLOW BIT(5) /** Flags and address space this VMA is bound to */ -#define GLOBAL_BIND (1<<0) -#define LOCAL_BIND (1<<1) - unsigned int bound : 4; - bool is_ggtt : 1; +#define I915_VMA_GLOBAL_BIND BIT(6) +#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) + +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CLOSED BIT(9) + + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; /** * Support different GGTT views into the same object. @@ -208,20 +230,46 @@ struct i915_vma { struct hlist_node exec_node; unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; - - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. */ - unsigned int pin_count:4; -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf }; +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT; +} + +static inline bool i915_vma_is_closed(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CLOSED; +} + +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) +{ + return vma->active; +} + +static inline bool i915_vma_is_active(const struct i915_vma *vma) +{ + return i915_vma_get_active(vma); +} + +static inline void i915_vma_set_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active |= BIT(engine); +} + +static inline void i915_vma_clear_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active &= ~BIT(engine); +} + +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, + unsigned int engine) +{ + return vma->active & BIT(engine); +} + struct i915_page_dma { struct page *page; union { @@ -272,11 +320,20 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; struct drm_device *dev; + /* Every address space belongs to a struct file - except for the global + * GTT that is owned by the driver (and so @file is set to NULL). In + * principle, no information should leak from one context to another + * (or between files/processes etc) unless explicitly shared by the + * owner. Tracking the owner is important in order to free up per-file + * objects along with the file, to aide resource tracking, and to + * assign blame. + */ + struct drm_i915_file_private *file; struct list_head global_link; u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - bool is_ggtt; + bool closed; struct i915_page_scratch *scratch_page; struct i915_page_table *scratch_pt; @@ -306,6 +363,13 @@ struct i915_address_space { */ struct list_head inactive_list; + /** + * List of vma that have been unbound. + * + * A reference is not held on the buffer while on this list. + */ + struct list_head unbound_list; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -338,7 +402,7 @@ struct i915_address_space { u32 flags); }; -#define i915_is_ggtt(V) ((V)->is_ggtt) +#define i915_is_ggtt(V) (!(V)->file) /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal @@ -354,7 +418,6 @@ struct i915_ggtt { size_t stolen_usable_size; /* Total size minus BIOS reserved */ size_t stolen_reserved_base; size_t stolen_reserved_size; - size_t size; /* Total size of Global GTT */ u64 mappable_end; /* End offset that we can CPU map */ struct io_mapping *mappable; /* Mapping to our CPU mappable region */ phys_addr_t mappable_base; /* PA of our GMADR */ @@ -365,8 +428,6 @@ struct i915_ggtt { bool do_idle_maps; int mtrr; - - int (*probe)(struct i915_ggtt *ggtt); }; struct i915_hw_ppgtt { @@ -380,8 +441,6 @@ struct i915_hw_ppgtt { struct i915_page_directory pd; /* GEN6-7 */ }; - struct drm_i915_file_private *file_priv; - gen6_pte_t __iomem *pd_addr; int (*enable)(struct i915_hw_ppgtt *ppgtt); @@ -521,14 +580,15 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } -int i915_ggtt_init_hw(struct drm_device *dev); -int i915_ggtt_enable_hw(struct drm_device *dev); -void i915_gem_init_ggtt(struct drm_device *dev); -void i915_ggtt_cleanup_hw(struct drm_device *dev); +int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); +int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); +int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); +int i915_gem_init_ggtt(struct drm_i915_private *dev_priv); +void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); -struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, +struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, struct drm_i915_file_private *fpriv); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { @@ -562,9 +622,66 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a, return true; } -size_t -i915_ggtt_view_size(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); +/* Flags used by pin/bind&friends. */ +#define PIN_NONBLOCK BIT(0) +#define PIN_MAPPABLE BIT(1) +#define PIN_ZONE_4G BIT(2) + +#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT(8) + +#define PIN_HIGH BIT(9) +#define PIN_OFFSET_BIAS BIT(10) +#define PIN_OFFSET_FIXED BIT(11) +#define PIN_OFFSET_MASK (~4095) + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + return 0; + + return __i915_vma_do_pin(vma, size, alignment, flags); +} + +static inline int i915_vma_pin_count(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_PIN_MASK; +} + +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) +{ + return i915_vma_pin_count(vma); +} + +static inline void __i915_vma_pin(struct i915_vma *vma) +{ + vma->flags++; + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); +} + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + vma->flags--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} /** * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture @@ -580,6 +697,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj, * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) /** * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap @@ -593,9 +711,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); static inline void i915_vma_unpin_iomap(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON(vma->pin_count == 0); GEM_BUG_ON(vma->iomap == NULL); - vma->pin_count--; + i915_vma_unpin(vma); } #endif diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f75bbd67a13a..57fd767a2d79 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,10 +28,18 @@ #include "i915_drv.h" #include "intel_renderstate.h" +struct render_state { + const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; + u64 ggtt_offset; + u32 aux_batch_size; + u32 aux_batch_offset; +}; + static const struct intel_renderstate_rodata * -render_state_get_rodata(const int gen) +render_state_get_rodata(const struct drm_i915_gem_request *req) { - switch (gen) { + switch (INTEL_GEN(req->i915)) { case 6: return &gen6_null_state; case 7: @@ -45,35 +53,6 @@ render_state_get_rodata(const int gen) return NULL; } -static int render_state_init(struct render_state *so, - struct drm_i915_private *dev_priv) -{ - int ret; - - so->gen = INTEL_GEN(dev_priv); - so->rodata = render_state_get_rodata(so->gen); - if (so->rodata == NULL) - return 0; - - if (so->rodata->batch_items * 4 > 4096) - return -EINVAL; - - so->obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); - - ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); - if (ret) - goto free_gem; - - so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj); - return 0; - -free_gem: - drm_gem_object_unreference(&so->obj->base); - return ret; -} - /* * Macro to add commands to auxiliary batch. * This macro only checks for page overflow before inserting the commands, @@ -96,6 +75,7 @@ static int render_state_setup(struct render_state *so) { struct drm_device *dev = so->obj->base.dev; const struct intel_renderstate_rodata *rodata = so->rodata; + const bool has_64bit_reloc = INTEL_GEN(dev) >= 8; unsigned int i = 0, reloc_index = 0; struct page *page; u32 *d; @@ -114,7 +94,7 @@ static int render_state_setup(struct render_state *so) if (i * 4 == rodata->reloc[reloc_index]) { u64 r = s + so->ggtt_offset; s = lower_32_bits(r); - if (so->gen >= 8) { + if (has_64bit_reloc) { if (i + 1 >= rodata->batch_items || rodata->batch[i + 1] != 0) { ret = -EINVAL; @@ -192,67 +172,55 @@ err_out: #undef OUT_BATCH -void i915_gem_render_state_fini(struct render_state *so) -{ - i915_gem_object_ggtt_unpin(so->obj); - drm_gem_object_unreference(&so->obj->base); -} - -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so) +int i915_gem_render_state_init(struct drm_i915_gem_request *req) { + struct render_state so; int ret; - if (WARN_ON(engine->id != RCS)) + if (WARN_ON(req->engine->id != RCS)) return -ENOENT; - ret = render_state_init(so, engine->i915); - if (ret) - return ret; - - if (so->rodata == NULL) + so.rodata = render_state_get_rodata(req); + if (!so.rodata) return 0; - ret = render_state_setup(so); - if (ret) { - i915_gem_render_state_fini(so); - return ret; - } + if (so.rodata->batch_items * 4 > 4096) + return -EINVAL; - return 0; -} + so.obj = i915_gem_object_create(&req->i915->drm, 4096); + if (IS_ERR(so.obj)) + return PTR_ERR(so.obj); -int i915_gem_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; - - ret = i915_gem_render_state_prepare(req->engine, &so); + ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0); if (ret) - return ret; + goto err_obj; - if (so.rodata == NULL) - return 0; + so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj); + + ret = render_state_setup(&so); + if (ret) + goto err_unpin; - ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset, - so.rodata->batch_items * 4, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, so.ggtt_offset, + so.rodata->batch_items * 4, + I915_DISPATCH_SECURE); if (ret) - goto out; + goto err_unpin; if (so.aux_batch_size > 8) { - ret = req->engine->dispatch_execbuffer(req, - (so.ggtt_offset + - so.aux_batch_offset), - so.aux_batch_size, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, + (so.ggtt_offset + + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); if (ret) - goto out; + goto err_unpin; } - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0); +err_unpin: + i915_gem_object_ggtt_unpin(so.obj); +err_obj: + i915_gem_object_put(so.obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 6aaa3a10a630..c44fca8599bb 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,24 +26,6 @@ #include <linux/types.h> -struct intel_renderstate_rodata { - const u32 *reloc; - const u32 *batch; - const u32 batch_items; -}; - -struct render_state { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - u64 ggtt_offset; - int gen; - u32 aux_batch_size; - u32 aux_batch_offset; -}; - int i915_gem_render_state_init(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct render_state *so); -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c new file mode 100644 index 000000000000..6a1661643d3d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -0,0 +1,767 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/prefetch.h> + +#include "i915_drv.h" + +static const char *i915_fence_get_driver_name(struct fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct fence *fence) +{ + /* Timelines are bound by eviction to a VM. However, since + * we only have a global seqno at the moment, we only have + * a single timeline. Note that each timeline will have + * multiple execution contexts (fence contexts) as we allow + * engines within a single timeline to execute in parallel. + */ + return "global"; +} + +static bool i915_fence_signaled(struct fence *fence) +{ + return i915_gem_request_completed(to_request(fence)); +} + +static bool i915_fence_enable_signaling(struct fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + intel_engine_enable_signaling(to_request(fence)); + return true; +} + +static signed long i915_fence_wait(struct fence *fence, + bool interruptible, + signed long timeout_jiffies) +{ + s64 timeout_ns, *timeout; + int ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + timeout_ns = jiffies_to_nsecs(timeout_jiffies); + timeout = &timeout_ns; + } else { + timeout = NULL; + } + + ret = i915_wait_request(to_request(fence), + interruptible, timeout, + NO_WAITBOOST); + if (ret == -ETIME) + return 0; + + if (ret < 0) + return ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) + timeout_jiffies = nsecs_to_jiffies(timeout_ns); + + return timeout_jiffies; +} + +static void i915_fence_value_str(struct fence *fence, char *str, int size) +{ + snprintf(str, size, "%u", fence->seqno); +} + +static void i915_fence_timeline_value_str(struct fence *fence, char *str, + int size) +{ + snprintf(str, size, "%u", + intel_engine_get_seqno(to_request(fence)->engine)); +} + +static void i915_fence_release(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_request(fence); + + kmem_cache_free(req->i915->requests, req); +} + +const struct fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, + .fence_value_str = i915_fence_value_str, + .timeline_value_str = i915_fence_timeline_value_str, +}; + +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + struct drm_i915_private *dev_private; + struct drm_i915_file_private *file_priv; + + WARN_ON(!req || !file || req->file_priv); + + if (!req || !file) + return -EINVAL; + + if (req->file_priv) + return -EINVAL; + + dev_private = req->i915; + file_priv = file->driver_priv; + + spin_lock(&file_priv->mm.lock); + req->file_priv = file_priv; + list_add_tail(&req->client_list, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + + req->pid = get_pid(task_pid(current)); + + return 0; +} + +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +{ + struct drm_i915_file_private *file_priv = request->file_priv; + + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + list_del(&request->client_list); + request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); + + put_pid(request->pid); + request->pid = NULL; +} + +void i915_gem_retire_noop(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + /* Space left intentionally blank */ +} + +static void i915_gem_request_retire(struct drm_i915_gem_request *request) +{ + struct i915_gem_active *active, *next; + + trace_i915_gem_request_retire(request); + list_del(&request->link); + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + list_del(&request->ring_link); + request->ring->last_retired_head = request->postfix; + + /* Walk through the active list, calling retire on each. This allows + * objects to track their GPU activity and mark themselves as idle + * when their *last* active request is completed (updating state + * tracking lists for eviction, active references for GEM, etc). + * + * As the ->retire() may free the node, we decouple it first and + * pass along the auxiliary information (to avoid dereferencing + * the node after the callback). + */ + list_for_each_entry_safe(active, next, &request->active_list, link) { + /* In microbenchmarks or focusing upon time inside the kernel, + * we may spend an inordinate amount of time simply handling + * the retirement of requests and processing their callbacks. + * Of which, this loop itself is particularly hot due to the + * cache misses when jumping around the list of i915_gem_active. + * So we try to keep this loop as streamlined as possible and + * also prefetch the next i915_gem_active to try and hide + * the likely cache miss. + */ + prefetchw(next); + + INIT_LIST_HEAD(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + } + + i915_gem_request_remove_from_client(request); + + if (request->previous_context) { + if (i915.enable_execlists) + intel_lr_context_unpin(request->previous_context, + request->engine); + } + + i915_gem_context_put(request->ctx); + i915_gem_request_put(request); +} + +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *engine = req->engine; + struct drm_i915_gem_request *tmp; + + lockdep_assert_held(&req->i915->drm.struct_mutex); + GEM_BUG_ON(list_empty(&req->link)); + + do { + tmp = list_first_entry(&engine->request_list, + typeof(*tmp), link); + + i915_gem_request_retire(tmp); + } while (tmp != req); +} + +static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) +{ + if (__i915_terminally_wedged(reset_counter)) + return -EIO; + + if (__i915_reset_in_progress(reset_counter)) { + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. + */ + if (!interruptible) + return -EIO; + + return -EAGAIN; + } + + return 0; +} + +static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) +{ + struct intel_engine_cs *engine; + int ret; + + /* Carefully retire all requests without writing to the rings */ + for_each_engine(engine, dev_priv) { + ret = intel_engine_idle(engine, true); + if (ret) + return ret; + } + i915_gem_retire_requests(dev_priv); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { + while (intel_kick_waiters(dev_priv) || + intel_kick_signalers(dev_priv)) + yield(); + } + + /* Finally reset hw state */ + for_each_engine(engine, dev_priv) + intel_engine_init_seqno(engine, seqno); + + return 0; +} + +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + int ret; + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + ret = i915_gem_init_seqno(dev_priv, seqno - 1); + if (ret) + return ret; + + dev_priv->next_seqno = seqno; + return 0; +} + +static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) +{ + /* reserve 0 for non-seqno */ + if (unlikely(dev_priv->next_seqno == 0)) { + int ret; + + ret = i915_gem_init_seqno(dev_priv, 0); + if (ret) + return ret; + + dev_priv->next_seqno = 1; + } + + *seqno = dev_priv->next_seqno++; + return 0; +} + +/** + * i915_gem_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * This can be NULL if the request is not directly related to + * any specific user context, in which case this function will + * choose an appropriate context to use. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct drm_i915_gem_request * +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_private *dev_priv = engine->i915; + unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); + struct drm_i915_gem_request *req; + u32 seqno; + int ret; + + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex + * and restart. + */ + ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); + if (ret) + return ERR_PTR(ret); + + /* Move the oldest request to the slab-cache (if not in use!) */ + req = list_first_entry_or_null(&engine->request_list, + typeof(*req), link); + if (req && i915_gem_request_completed(req)) + i915_gem_request_retire(req); + + req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + ret = i915_gem_get_seqno(dev_priv, &seqno); + if (ret) + goto err; + + spin_lock_init(&req->lock); + fence_init(&req->fence, + &i915_fence_ops, + &req->lock, + engine->fence_context, + seqno); + + INIT_LIST_HEAD(&req->active_list); + req->i915 = dev_priv; + req->engine = engine; + req->ctx = i915_gem_context_get(ctx); + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_add_request() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + + if (i915.enable_execlists) + ret = intel_logical_ring_alloc_request_extras(req); + else + ret = intel_ring_alloc_request_extras(req); + if (ret) + goto err_ctx; + + return req; + +err_ctx: + i915_gem_context_put(ctx); +err: + kmem_cache_free(dev_priv->requests, req); + return ERR_PTR(ret); +} + +static void i915_gem_mark_busy(const struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + dev_priv->gt.active_engines |= intel_engine_flag(engine); + if (dev_priv->gt.awake) + return; + + intel_runtime_pm_get_noresume(dev_priv); + dev_priv->gt.awake = true; + + intel_enable_gt_powersave(dev_priv); + i915_update_gfx_val(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_rps_busy(dev_priv); + + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct drm_i915_gem_request *request, + struct drm_i915_gem_object *obj, + bool flush_caches) +{ + struct intel_engine_cs *engine; + struct intel_ring *ring; + u32 request_start; + u32 reserved_tail; + int ret; + + if (WARN_ON(!request)) + return; + + engine = request->engine; + ring = request->ring; + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + request_start = ring->tail; + reserved_tail = request->reserved_space; + request->reserved_space = 0; + + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (flush_caches) { + ret = engine->emit_flush(request, EMIT_FLUSH); + + /* Not allowed to fail! */ + WARN(ret, "engine->emit_flush() failed: %d!\n", ret); + } + + trace_i915_gem_request_add(request); + + request->head = request_start; + + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + request->batch_obj = obj; + + /* Seal the request and mark it as pending execution. Note that + * we may inspect this state, without holding any locks, during + * hangcheck. Hence we apply the barrier to ensure that we do not + * see a more recent value in the hws than we are tracking. + */ + request->emitted_jiffies = jiffies; + request->previous_seqno = engine->last_submitted_seqno; + engine->last_submitted_seqno = request->fence.seqno; + i915_gem_active_set(&engine->last_request, request); + list_add_tail(&request->link, &engine->request_list); + list_add_tail(&request->ring_link, &ring->request_list); + + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + request->postfix = ring->tail; + + /* Not allowed to fail! */ + ret = engine->emit_request(request); + WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret); + + /* Sanity check that the reserved size was large enough. */ + ret = ring->tail - request_start; + if (ret < 0) + ret += ring->size; + WARN_ONCE(ret > reserved_tail, + "Not enough space reserved (%d bytes) " + "for adding the request (%d bytes)\n", + reserved_tail, ret); + + i915_gem_mark_busy(engine); + engine->submit_request(request); +} + +static unsigned long local_clock_us(unsigned int *cpu) +{ + unsigned long t; + + /* Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned int cpu) +{ + unsigned int this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +bool __i915_spin_request(const struct drm_i915_gem_request *req, + int state, unsigned long timeout_us) +{ + unsigned int cpu; + + /* When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ + + timeout_us += local_clock_us(&cpu); + do { + if (i915_gem_request_completed(req)) + return true; + + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout_us, cpu)) + break; + + cpu_relax_lowlatency(); + } while (!need_resched()); + + return false; +} + +/** + * i915_wait_request - wait until execution of request has finished + * @req: duh! + * @interruptible: do an interruptible wait (normally yes) + * @timeout: in - how long to wait (NULL forever); out - how much time remaining + * @rps: client to charge for RPS boosting + * + * Note: It is of utmost importance that the passed in seqno and reset_counter + * values have been read by the caller in an smp safe manner. Where read-side + * locks are involved, it is sufficient to read the reset_counter before + * unlocking the lock that protects the seqno. For lockless tricks, the + * reset_counter _must_ be read before, and an appropriate smp_rmb must be + * inserted. + * + * Returns 0 if the request was found within the alloted time. Else returns the + * errno with remaining time filled in timeout argument. + */ +int i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) +{ + int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); + struct intel_wait wait; + unsigned long timeout_remain; + int ret = 0; + + might_sleep(); + + if (i915_gem_request_completed(req)) + return 0; + + timeout_remain = MAX_SCHEDULE_TIMEOUT; + if (timeout) { + if (WARN_ON(*timeout < 0)) + return -EINVAL; + + if (*timeout == 0) + return -ETIME; + + /* Record current time in case interrupted, or wedged */ + timeout_remain = nsecs_to_jiffies_timeout(*timeout); + *timeout += ktime_get_raw_ns(); + } + + trace_i915_gem_request_wait_begin(req); + + /* This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we wait. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). Not all clients even want their results + * immediately and for them we should just let the GPU select its own + * frequency to maximise efficiency. To prevent a single client from + * forcing the clocks too high for the whole system, we only allow + * each client to waitboost once in a busy period. + */ + if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6) + gen6_rps_boost(req->i915, rps, req->emitted_jiffies); + + /* Optimistic short spin before touching IRQs */ + if (i915_spin_request(req, state, 5)) + goto complete; + + set_current_state(state); + add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_wait_init(&wait, req->fence.seqno); + if (intel_engine_add_wait(req->engine, &wait)) + /* In order to check that we haven't missed the interrupt + * as we enabled it, we need to kick ourselves to do a + * coherent check on the seqno before we sleep. + */ + goto wakeup; + + for (;;) { + if (signal_pending_state(state, current)) { + ret = -ERESTARTSYS; + break; + } + + timeout_remain = io_schedule_timeout(timeout_remain); + if (timeout_remain == 0) { + ret = -ETIME; + break; + } + + if (intel_wait_complete(&wait)) + break; + + set_current_state(state); + +wakeup: + /* Carefully check if the request is complete, giving time + * for the seqno to be visible following the interrupt. + * We also have to check in case we are kicked by the GPU + * reset in order to drop the struct_mutex. + */ + if (__i915_request_irq_complete(req)) + break; + + /* Only spin if we know the GPU is processing this request */ + if (i915_spin_request(req, state, 2)) + break; + } + remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_engine_remove_wait(req->engine, &wait); + __set_current_state(TASK_RUNNING); +complete: + trace_i915_gem_request_wait_end(req); + + if (timeout) { + *timeout -= ktime_get_raw_ns(); + if (*timeout < 0) + *timeout = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regrssion from the timespec->ktime conversion. + */ + if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) + *timeout = 0; + } + + if (IS_RPS_USER(rps) && + req->fence.seqno == req->engine->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. + * Since no other client has submitted a request in the + * meantime, assume that this client is the only one + * supplying work to the GPU but is unable to keep that + * work supplied because it is waiting. Since the GPU is + * then never kept fully busy, RPS autoclocking will + * keep the clocks relatively low, causing further delays. + * Compensate by giving the synchronous client credit for + * a waitboost next time. + */ + spin_lock(&req->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&req->i915->rps.client_lock); + } + + return ret; +} + +static void engine_retire_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *request, *next; + + list_for_each_entry_safe(request, next, &engine->request_list, link) { + if (!i915_gem_request_completed(request)) + break; + + i915_gem_request_retire(request); + } +} + +void i915_gem_retire_requests(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (dev_priv->gt.active_engines == 0) + return; + + GEM_BUG_ON(!dev_priv->gt.awake); + + for_each_engine(engine, dev_priv) { + engine_retire_requests(engine); + if (!intel_engine_is_active(engine)) + dev_priv->gt.active_engines &= ~intel_engine_flag(engine); + } + + if (dev_priv->gt.active_engines == 0) + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.idle_work, + msecs_to_jiffies(100)); +} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h new file mode 100644 index 000000000000..3496e28785e7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -0,0 +1,676 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_GEM_REQUEST_H +#define I915_GEM_REQUEST_H + +#include <linux/fence.h> + +#include "i915_gem.h" + +struct intel_wait { + struct rb_node node; + struct task_struct *tsk; + u32 seqno; +}; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable sequence + * number comparisons on buffer last_read|write_seqno. It also allows an + * emission time to be associated with the request for tracking how far ahead + * of the GPU the submission is. + * + * The requests are reference counted. + */ +struct drm_i915_gem_request { + struct fence fence; + spinlock_t lock; + + /** On Which ring this request was generated */ + struct drm_i915_private *i915; + + /** + * Context and ring buffer related to this request + * Contexts are refcounted, so when this request is associated with a + * context, we must increment the context's refcount, to guarantee that + * it persists while any request is linked to it. Requests themselves + * are also refcounted, so the request will only be freed when the last + * reference to it is dismissed, and the code in + * i915_gem_request_free() will then decrement the refcount on the + * context. + */ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + struct intel_ring *ring; + struct intel_signal_node signaling; + + /** GEM sequence number associated with the previous request, + * when the HWS breadcrumb is equal to this the GPU is processing + * this request. + */ + u32 previous_seqno; + + /** Position in the ringbuffer of the start of the request */ + u32 head; + + /** + * Position in the ringbuffer of the start of the postfix. + * This is required to calculate the maximum available ringbuffer + * space without overwriting the postfix. + */ + u32 postfix; + + /** Position in the ringbuffer of the end of the whole request */ + u32 tail; + + /** Preallocate space in the ringbuffer for the emitting the request */ + u32 reserved_space; + + /** + * Context related to the previous request. + * As the contexts are accessed by the hardware until the switch is + * completed to a new context, the hardware may still be writing + * to the context object after the breadcrumb is visible. We must + * not unpin/unbind/prune that object whilst still active and so + * we keep the previous context pinned until the following (this) + * request is retired. + */ + struct i915_gem_context *previous_context; + + /** Batch buffer related to this request if any (used for + * error state dump only). + */ + struct drm_i915_gem_object *batch_obj; + struct list_head active_list; + + /** Time at which this request was emitted, in jiffies. */ + unsigned long emitted_jiffies; + + /** engine->request_list entry for this request */ + struct list_head link; + + /** ring->request_list entry for this request */ + struct list_head ring_link; + + struct drm_i915_file_private *file_priv; + /** file_priv list entry for this request */ + struct list_head client_list; + + /** process identifier submitting this request */ + struct pid *pid; + + /** + * The ELSP only accepts two elements at a time, so we queue + * context/tail pairs on a given queue (ring->execlist_queue) until the + * hardware is available. The queue serves a double purpose: we also use + * it to keep track of the up to 2 contexts currently in the hardware + * (usually one in execution and the other queued up by the GPU): We + * only remove elements from the head of the queue when the hardware + * informs us that an element has been completed. + * + * All accesses to the queue are mediated by a spinlock + * (ring->execlist_lock). + */ + + /** Execlist link in the submission queue.*/ + struct list_head execlist_link; + + /** Execlists no. of times this request has been sent to the ELSP */ + int elsp_submitted; + + /** Execlists context hardware id. */ + unsigned int ctx_hw_id; +}; + +extern const struct fence_ops i915_fence_ops; + +static inline bool fence_is_i915(struct fence *fence) +{ + return fence->ops == &i915_fence_ops; +} + +struct drm_i915_gem_request * __must_check +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file); +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); + +static inline u32 +i915_gem_request_get_seqno(struct drm_i915_gem_request *req) +{ + return req ? req->fence.seqno : 0; +} + +static inline struct intel_engine_cs * +i915_gem_request_get_engine(struct drm_i915_gem_request *req) +{ + return req ? req->engine : NULL; +} + +static inline struct drm_i915_gem_request * +to_request(struct fence *fence) +{ + /* We assume that NULL fence/request are interoperable */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); + GEM_BUG_ON(fence && !fence_is_i915(fence)); + return container_of(fence, struct drm_i915_gem_request, fence); +} + +static inline struct drm_i915_gem_request * +i915_gem_request_get(struct drm_i915_gem_request *req) +{ + return to_request(fence_get(&req->fence)); +} + +static inline struct drm_i915_gem_request * +i915_gem_request_get_rcu(struct drm_i915_gem_request *req) +{ + return to_request(fence_get_rcu(&req->fence)); +} + +static inline void +i915_gem_request_put(struct drm_i915_gem_request *req) +{ + fence_put(&req->fence); +} + +static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, + struct drm_i915_gem_request *src) +{ + if (src) + i915_gem_request_get(src); + + if (*pdst) + i915_gem_request_put(*pdst); + + *pdst = src; +} + +void __i915_add_request(struct drm_i915_gem_request *req, + struct drm_i915_gem_object *batch_obj, + bool flush_caches); +#define i915_add_request(req) \ + __i915_add_request(req, NULL, true) +#define i915_add_request_no_flush(req) \ + __i915_add_request(req, NULL, false) + +struct intel_rps_client; +#define NO_WAITBOOST ERR_PTR(-1) +#define IS_RPS_CLIENT(p) (!IS_ERR(p)) +#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) + +int i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) + __attribute__((nonnull(1))); + +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +static inline bool +i915_gem_request_started(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->previous_seqno); +} + +static inline bool +i915_gem_request_completed(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->fence.seqno); +} + +bool __i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + +/* We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_gem_active to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_gem_active is updated with i915_gem_active_set() to track the + * most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active; + +typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, + struct drm_i915_gem_request *); + +struct i915_gem_active { + struct drm_i915_gem_request __rcu *request; + struct list_head link; + i915_gem_retire_fn retire; +}; + +void i915_gem_retire_noop(struct i915_gem_active *, + struct drm_i915_gem_request *request); + +/** + * init_request_active - prepares the activity tracker for use + * @active - the active tracker + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * init_request_active() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +init_request_active(struct i915_gem_active *active, + i915_gem_retire_fn retire) +{ + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_gem_retire_noop; +} + +/** + * i915_gem_active_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * i915_gem_active_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ +static inline void +i915_gem_active_set(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + list_move(&active->link, &request->active_list); + rcu_assign_pointer(active->request, request); +} + +static inline struct drm_i915_gem_request * +__i915_gem_active_peek(const struct i915_gem_active *active) +{ + /* Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); +} + +/** + * i915_gem_active_peek - report the active request being monitored + * @active - the active tracker + * + * i915_gem_active_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) +{ + struct drm_i915_gem_request *request; + + request = rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); + if (!request || i915_gem_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_gem_active_peek_rcu - report the active request being monitored + * @active - the active tracker + * + * i915_gem_active_peek_rcu() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, and inspection of the request is only valid under + * the RCU lock. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_peek_rcu(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_gem_active_get - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) +{ + return i915_gem_request_get(i915_gem_active_peek(active, mutex)); +} + +/** + * __i915_gem_active_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold the RCU read lock, but + * the returned pointer is safe to use outside of RCU. + */ +static inline struct drm_i915_gem_request * +__i915_gem_active_get_rcu(const struct i915_gem_active *active) +{ + /* Performing a lockless retrieval of the active request is super + * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * req = active.request + * retire(req) -> free(req); + * (req is now first on the slab freelist) + * active.request = NULL + * + * req = new submission on a new object + * ref(req) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + */ + do { + struct drm_i915_gem_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return NULL; + + request = i915_gem_request_get_rcu(request); + + /* What stops the following rcu_access_pointer() from occurring + * before the above i915_gem_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_gem_request_get_rcu(), see fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_gem_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_gem_request_put(request); + } while (1); +} + +/** + * i915_gem_active_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_gem_request_put(). + */ +static inline struct drm_i915_gem_request * +i915_gem_active_get_unlocked(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + rcu_read_lock(); + request = __i915_gem_active_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** + * i915_gem_active_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_gem_active_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_gem_active_isset(const struct i915_gem_active *active) +{ + return rcu_access_pointer(active->request); +} + +/** + * i915_gem_active_is_idle - report whether the active tracker is idle + * @active - the active tracker + * + * i915_gem_active_is_idle() returns true if the active tracker is currently + * unassigned or if the request is complete (but not yet retired). Requires + * the caller to hold struct_mutex (but that can be relaxed if desired). + */ +static inline bool +i915_gem_active_is_idle(const struct i915_gem_active *active, + struct mutex *mutex) +{ + return !i915_gem_active_peek(active, mutex); +} + +/** + * i915_gem_active_wait - waits until the request is completed + * @active - the active request on which to wait + * + * i915_gem_active_wait() waits until the request is completed before + * returning. Note that it does not guarantee that the request is + * retired first, see i915_gem_active_retire(). + * + * i915_gem_active_wait() returns immediately if the active + * request is already complete. + */ +static inline int __must_check +i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) +{ + struct drm_i915_gem_request *request; + + request = i915_gem_active_peek(active, mutex); + if (!request) + return 0; + + return i915_wait_request(request, true, NULL, NULL); +} + +/** + * i915_gem_active_wait_unlocked - waits until the request is completed + * @active - the active request on which to wait + * @interruptible - whether the wait can be woken by a userspace signal + * @timeout - how long to wait at most + * @rps - userspace client to charge for a waitboost + * + * i915_gem_active_wait_unlocked() waits until the request is completed before + * returning, without requiring any locks to be held. Note that it does not + * retire any requests before returning. + * + * This function relies on RCU in order to acquire the reference to the active + * request without holding any locks. See __i915_gem_active_get_rcu() for the + * glory details on how that is managed. Once the reference is acquired, we + * can then wait upon the request, and afterwards release our reference, + * free of any locking. + * + * This function wraps i915_wait_request(), see it for the full details on + * the arguments. + * + * Returns 0 if successful, or a negative error code. + */ +static inline int +i915_gem_active_wait_unlocked(const struct i915_gem_active *active, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) +{ + struct drm_i915_gem_request *request; + int ret = 0; + + request = i915_gem_active_get_unlocked(active); + if (request) { + ret = i915_wait_request(request, interruptible, timeout, rps); + i915_gem_request_put(request); + } + + return ret; +} + +/** + * i915_gem_active_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_gem_active_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_gem_active_retire(struct i915_gem_active *active, + struct mutex *mutex) +{ + struct drm_i915_gem_request *request; + int ret; + + request = rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); + if (!request) + return 0; + + ret = i915_wait_request(request, true, NULL, NULL); + if (ret) + return ret; + + list_del_init(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + + return 0; +} + +/* Convenience functions for peeking at state inside active's request whilst + * guarded by the struct_mutex. + */ + +static inline uint32_t +i915_gem_active_get_seqno(const struct i915_gem_active *active, + struct mutex *mutex) +{ + return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex)); +} + +static inline struct intel_engine_cs * +i915_gem_active_get_engine(const struct i915_gem_active *active, + struct mutex *mutex) +{ + return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex)); +} + +#define for_each_active(mask, idx) \ + for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) + +#endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6f10b421487b..b80802b35353 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -48,19 +48,15 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) #endif } -static int num_vma_bound(struct drm_i915_gem_object *obj) +static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - int count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (drm_mm_node_allocated(&vma->node)) - count++; - if (vma->pin_count) - count++; - } + list_for_each_entry(vma, &obj->vma_list, obj_link) + if (i915_vma_is_pinned(vma)) + return true; - return count; + return false; } static bool swap_available(void) @@ -82,7 +78,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * to the GPU, simply unbinding from the GPU is not going to succeed * in releasing our pin count on the pages themselves. */ - if (obj->pages_pin_count != num_vma_bound(obj)) + if (obj->pages_pin_count > obj->bind_count) + return false; + + if (any_vma_pinned(obj)) return false; /* We can only return physical pages to the system if we can either @@ -163,17 +162,16 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, */ for (phase = phases; phase->list; phase++) { struct list_head still_in_list; + struct drm_i915_gem_object *obj; if ((flags & phase->bit) == 0) continue; INIT_LIST_HEAD(&still_in_list); - while (count < target && !list_empty(phase->list)) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma, *v; - - obj = list_first_entry(phase->list, - typeof(*obj), global_list); + while (count < target && + (obj = list_first_entry_or_null(phase->list, + typeof(*obj), + global_list))) { list_move_tail(&obj->global_list, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && @@ -184,24 +182,21 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, !is_vmalloc_addr(obj->mapping)) continue; - if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active) + if ((flags & I915_SHRINK_ACTIVE) == 0 && + i915_gem_object_is_active(obj)) continue; if (!can_release_pages(obj)) continue; - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); /* For the unbound phase, this should be a no-op! */ - list_for_each_entry_safe(vma, v, - &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - + i915_gem_object_unbind(obj); if (i915_gem_object_put_pages(obj) == 0) count += obj->base.size >> PAGE_SHIFT; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } list_splice(&still_in_list, phase->list); } @@ -210,6 +205,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); + /* expedite the RCU grace period to free some request slabs */ + synchronize_rcu_expedited(); return count; } @@ -230,10 +227,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, */ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { - return i915_gem_shrink(dev_priv, -1UL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + unsigned long freed; + + freed = i915_gem_shrink(dev_priv, -1UL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ + + return freed; } static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) @@ -242,9 +244,6 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) if (!mutex_is_locked_by(&dev->struct_mutex, current)) return false; - if (to_i915(dev)->mm.shrinker_no_lock_stealing) - return false; - *unlock = false; } else *unlock = true; @@ -273,7 +272,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (!obj->active && can_release_pages(obj)) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; } @@ -321,17 +320,22 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, struct shrinker_lock_uninterruptible *slu, int timeout_ms) { - unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1; + unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); + + do { + if (i915_gem_wait_for_idle(dev_priv, false) == 0 && + i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) + break; - while (!i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) { schedule_timeout_killable(1); if (fatal_signal_pending(current)) return false; - if (--timeout == 0) { + + if (time_after(jiffies, timeout)) { pr_err("Unable to lock GPU to purge memory.\n"); return false; } - } + } while (1); slu->was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; @@ -410,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, false); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 66be299a1486..13279610eeec 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -698,24 +698,24 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, */ vma->node.start = gtt_offset; vma->node.size = size; - if (drm_mm_initialized(&ggtt->base.mm)) { - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); - goto err; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); + ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); + if (ret) { + DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); + goto err; } + vma->flags |= I915_VMA_GLOBAL_BIND; + __i915_vma_set_map_and_fenceable(vma); + list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); + obj->bind_count++; + list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); i915_gem_object_pin_pages(obj); return obj; err: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return NULL; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 8030199731db..f4b984de83b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (tiling_mode == I915_TILING_NONE) return true; + if (tiling_mode > I915_TILING_LAST) + return false; + if (IS_GEN2(dev) || (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; @@ -117,15 +120,16 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) static bool i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) { + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); u32 size; if (tiling_mode == I915_TILING_NONE) return true; - if (INTEL_INFO(obj->base.dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) return true; - if (IS_GEN3(obj->base.dev)) { + if (IS_GEN3(dev_priv)) { if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) return false; } else { @@ -133,7 +137,7 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) return false; } - size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode); + size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode); if (i915_gem_obj_ggtt_size(obj) != size) return false; @@ -166,13 +170,16 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int ret = 0; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + /* Make sure we don't cross-contaminate obj->tiling_and_stride */ + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); + + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return -EINVAL; } @@ -213,8 +220,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } } - if (args->tiling_mode != obj->tiling_mode || - args->stride != obj->stride) { + if (args->tiling_mode != i915_gem_object_get_tiling(obj) || + args->stride != i915_gem_object_get_stride(obj)) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but @@ -237,24 +244,25 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (args->tiling_mode == I915_TILING_NONE) i915_gem_object_unpin_pages(obj); - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) i915_gem_object_pin_pages(obj); } obj->fence_dirty = - obj->last_fenced_req || + !i915_gem_active_is_idle(&obj->last_fence, + &dev->struct_mutex) || obj->fence_reg != I915_FENCE_REG_NONE; - obj->tiling_mode = args->tiling_mode; - obj->stride = args->stride; + obj->tiling_and_stride = + args->stride | args->tiling_mode; /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); } } /* we have to maintain this existing ABI... */ - args->stride = obj->stride; - args->tiling_mode = obj->tiling_mode; + args->stride = i915_gem_object_get_stride(obj); + args->tiling_mode = i915_gem_object_get_tiling(obj); /* Try to preallocate memory required to save swizzling on put-pages */ if (i915_gem_object_needs_bit17_swizzle(obj)) { @@ -268,7 +276,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } err: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); intel_runtime_pm_put(dev_priv); @@ -297,14 +305,12 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; - mutex_lock(&dev->struct_mutex); - - args->tiling_mode = obj->tiling_mode; - switch (obj->tiling_mode) { + args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK; + switch (args->tiling_mode) { case I915_TILING_X: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; break; @@ -328,8 +334,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - + i915_gem_object_put_unlocked(obj); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2314c88323e3..57218cca7e05 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -63,33 +63,12 @@ struct i915_mmu_object { static void wait_rendering(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int i, n; - - if (!obj->active) - return; - - n = 0; - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read_req[i]; - if (req == NULL) - continue; - - requests[n++] = i915_gem_request_reference(req); - } - - mutex_unlock(&dev->struct_mutex); + unsigned long active = __I915_BO_ACTIVE(obj); + int idx; - for (i = 0; i < n; i++) - __i915_wait_request(requests[i], false, NULL, NULL); - - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) - i915_gem_request_unreference(requests[i]); + for_each_active(active, idx) + i915_gem_active_wait_unlocked(&obj->last_read[idx], + false, NULL, NULL); } static void cancel_userptr(struct work_struct *work) @@ -98,28 +77,19 @@ static void cancel_userptr(struct work_struct *work) struct drm_i915_gem_object *obj = mo->obj; struct drm_device *dev = obj->base.dev; + wait_rendering(obj); + mutex_lock(&dev->struct_mutex); /* Cancel any active worker and force us to re-evaluate gup */ obj->userptr.work = NULL; if (obj->pages != NULL) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_vma *vma, *tmp; - bool was_interruptible; - - wait_rendering(obj); - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - list_for_each_entry_safe(vma, tmp, &obj->vma_list, obj_link) - WARN_ON(i915_vma_unbind(vma)); + /* We are inside a kthread context and can't be interrupted */ + WARN_ON(i915_gem_object_unbind(obj)); WARN_ON(i915_gem_object_put_pages(obj)); - - dev_priv->mm.interruptible = was_interruptible; } - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); } @@ -577,7 +547,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } obj->userptr.workers--; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); release_pages(pvec, pinned, 0); @@ -622,8 +592,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, obj->userptr.work = &work->work; obj->userptr.workers++; - work->obj = obj; - drm_gem_object_reference(&obj->base); + work->obj = i915_gem_object_get(obj); work->task = current; get_task_struct(work->task); @@ -846,7 +815,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9d73d2216adc..eecb87063c88 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,9 +30,9 @@ #include <generated/utsrelease.h> #include "i915_drv.h" -static const char *ring_str(int ring) +static const char *engine_str(int engine) { - switch (ring) { + switch (engine) { case RCS: return "render"; case VCS: return "bsd"; case BCS: return "blt"; @@ -207,8 +207,8 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->ring != -1 ? " " : ""); - err_puts(m, ring_str(err->ring)); + err_puts(m, err->engine != -1 ? " " : ""); + err_puts(m, engine_str(err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -221,7 +221,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } } -static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) +static const char *hangcheck_action_to_str(enum intel_engine_hangcheck_action a) { switch (a) { case HANGCHECK_IDLE: @@ -239,70 +239,65 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) return "unknown"; } -static void i915_ring_error_state(struct drm_i915_error_state_buf *m, - struct drm_device *dev, - struct drm_i915_error_state *error, - int ring_idx) +static void error_print_engine(struct drm_i915_error_state_buf *m, + struct drm_i915_error_engine *ee) { - struct drm_i915_error_ring *ring = &error->ring[ring_idx]; - - if (!ring->valid) - return; - - err_printf(m, "%s command stream:\n", ring_str(ring_idx)); - err_printf(m, " START: 0x%08x\n", ring->start); - err_printf(m, " HEAD: 0x%08x\n", ring->head); - err_printf(m, " TAIL: 0x%08x\n", ring->tail); - err_printf(m, " CTL: 0x%08x\n", ring->ctl); - err_printf(m, " HWS: 0x%08x\n", ring->hws); - err_printf(m, " ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd); - err_printf(m, " IPEIR: 0x%08x\n", ring->ipeir); - err_printf(m, " IPEHR: 0x%08x\n", ring->ipehr); - err_printf(m, " INSTDONE: 0x%08x\n", ring->instdone); - if (INTEL_INFO(dev)->gen >= 4) { - err_printf(m, " BBADDR: 0x%08x %08x\n", (u32)(ring->bbaddr>>32), (u32)ring->bbaddr); - err_printf(m, " BB_STATE: 0x%08x\n", ring->bbstate); - err_printf(m, " INSTPS: 0x%08x\n", ring->instps); + err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, " START: 0x%08x\n", ee->start); + err_printf(m, " HEAD: 0x%08x\n", ee->head); + err_printf(m, " TAIL: 0x%08x\n", ee->tail); + err_printf(m, " CTL: 0x%08x\n", ee->ctl); + err_printf(m, " HWS: 0x%08x\n", ee->hws); + err_printf(m, " ACTHD: 0x%08x %08x\n", + (u32)(ee->acthd>>32), (u32)ee->acthd); + err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); + err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); + err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone); + if (INTEL_GEN(m->i915) >= 4) { + err_printf(m, " BBADDR: 0x%08x %08x\n", + (u32)(ee->bbaddr>>32), (u32)ee->bbaddr); + err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate); + err_printf(m, " INSTPS: 0x%08x\n", ee->instps); } - err_printf(m, " INSTPM: 0x%08x\n", ring->instpm); - err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ring->faddr), - lower_32_bits(ring->faddr)); - if (INTEL_INFO(dev)->gen >= 6) { - err_printf(m, " RC PSMI: 0x%08x\n", ring->rc_psmi); - err_printf(m, " FAULT_REG: 0x%08x\n", ring->fault_reg); + err_printf(m, " INSTPM: 0x%08x\n", ee->instpm); + err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr), + lower_32_bits(ee->faddr)); + if (INTEL_GEN(m->i915) >= 6) { + err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); + err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[0], - ring->semaphore_seqno[0]); + ee->semaphore_mboxes[0], + ee->semaphore_seqno[0]); err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[1], - ring->semaphore_seqno[1]); - if (HAS_VEBOX(dev)) { + ee->semaphore_mboxes[1], + ee->semaphore_seqno[1]); + if (HAS_VEBOX(m->i915)) { err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[2], - ring->semaphore_seqno[2]); + ee->semaphore_mboxes[2], + ee->semaphore_seqno[2]); } } - if (USES_PPGTT(dev)) { - err_printf(m, " GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode); + if (USES_PPGTT(m->i915)) { + err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); - if (INTEL_INFO(dev)->gen >= 8) { + if (INTEL_GEN(m->i915) >= 8) { int i; for (i = 0; i < 4; i++) err_printf(m, " PDP%d: 0x%016llx\n", - i, ring->vm_info.pdp[i]); + i, ee->vm_info.pdp[i]); } else { err_printf(m, " PP_DIR_BASE: 0x%08x\n", - ring->vm_info.pp_dir_base); + ee->vm_info.pp_dir_base); } } - err_printf(m, " seqno: 0x%08x\n", ring->seqno); - err_printf(m, " last_seqno: 0x%08x\n", ring->last_seqno); - err_printf(m, " waiting: %s\n", yesno(ring->waiting)); - err_printf(m, " ring->head: 0x%08x\n", ring->cpu_ring_head); - err_printf(m, " ring->tail: 0x%08x\n", ring->cpu_ring_tail); + err_printf(m, " seqno: 0x%08x\n", ee->seqno); + err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); + err_printf(m, " waiting: %s\n", yesno(ee->waiting)); + err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); + err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck: %s [%d]\n", - hangcheck_action_to_str(ring->hangcheck_action), - ring->hangcheck_score); + hangcheck_action_to_str(ee->hangcheck_action), + ee->hangcheck_score); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -348,17 +343,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); max_hangcheck_score = 0; - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - if (error->ring[i].hangcheck_score > max_hangcheck_score) - max_hangcheck_score = error->ring[i].hangcheck_score; + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].hangcheck_score > max_hangcheck_score) + max_hangcheck_score = error->engine[i].hangcheck_score; } - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - if (error->ring[i].hangcheck_score == max_hangcheck_score && - error->ring[i].pid != -1) { + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].hangcheck_score == max_hangcheck_score && + error->engine[i].pid != -1) { err_printf(m, "Active process (on ring %s): %s [%d]\n", - ring_str(i), - error->ring[i].comm, - error->ring[i].pid); + engine_str(i), + error->engine[i].comm, + error->engine[i].pid); } } err_printf(m, "Reset count: %u\n", error->reset_count); @@ -414,8 +409,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (IS_GEN7(dev)) err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); - for (i = 0; i < ARRAY_SIZE(error->ring); i++) - i915_ring_error_state(m, dev, error, i); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].engine_id != -1) + error_print_engine(m, &error->engine[i]); + } for (i = 0; i < error->vm_count; i++) { err_printf(m, "vm[%d]\n", i); @@ -429,21 +426,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count[i]); } - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - obj = error->ring[i].batchbuffer; + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + + obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i].name); - if (error->ring[i].pid != -1) + if (ee->pid != -1) err_printf(m, " (submitted by %s [%d])", - error->ring[i].comm, - error->ring[i].pid); + ee->comm, + ee->pid); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } - obj = error->ring[i].wa_batchbuffer; + obj = ee->wa_batchbuffer; if (obj) { err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n", dev_priv->engine[i].name, @@ -451,38 +450,38 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, obj); } - if (error->ring[i].num_requests) { + if (ee->num_requests) { err_printf(m, "%s --- %d requests\n", dev_priv->engine[i].name, - error->ring[i].num_requests); - for (j = 0; j < error->ring[i].num_requests; j++) { + ee->num_requests); + for (j = 0; j < ee->num_requests; j++) { err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", - error->ring[i].requests[j].seqno, - error->ring[i].requests[j].jiffies, - error->ring[i].requests[j].tail); + ee->requests[j].seqno, + ee->requests[j].jiffies, + ee->requests[j].tail); } } - if (error->ring[i].num_waiters) { + if (ee->num_waiters) { err_printf(m, "%s --- %d waiters\n", dev_priv->engine[i].name, - error->ring[i].num_waiters); - for (j = 0; j < error->ring[i].num_waiters; j++) { + ee->num_waiters); + for (j = 0; j < ee->num_waiters; j++) { err_printf(m, " seqno 0x%08x for %s [%d]\n", - error->ring[i].waiters[j].seqno, - error->ring[i].waiters[j].comm, - error->ring[i].waiters[j].pid); + ee->waiters[j].seqno, + ee->waiters[j].comm, + ee->waiters[j].pid); } } - if ((obj = error->ring[i].ringbuffer)) { + if ((obj = ee->ringbuffer)) { err_printf(m, "%s --- ringbuffer = 0x%08x\n", dev_priv->engine[i].name, lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } - if ((obj = error->ring[i].hws_page)) { + if ((obj = ee->hws_page)) { u64 hws_offset = obj->gtt_offset; u32 *hws_page = &obj->pages[0][0]; @@ -504,7 +503,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - obj = error->ring[i].wa_ctx; + obj = ee->wa_ctx; if (obj) { u64 wa_ctx_offset = obj->gtt_offset; u32 *wa_ctx_page = &obj->pages[0][0]; @@ -526,7 +525,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - if ((obj = error->ring[i].ctx)) { + if ((obj = ee->ctx)) { err_printf(m, "%s --- HW Context = 0x%08x\n", dev_priv->engine[i].name, lower_32_bits(obj->gtt_offset)); @@ -611,15 +610,18 @@ static void i915_error_state_free(struct kref *error_ref) typeof(*error), ref); int i; - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - i915_error_object_free(error->ring[i].batchbuffer); - i915_error_object_free(error->ring[i].wa_batchbuffer); - i915_error_object_free(error->ring[i].ringbuffer); - i915_error_object_free(error->ring[i].hws_page); - i915_error_object_free(error->ring[i].ctx); - i915_error_object_free(error->ring[i].wa_ctx); - kfree(error->ring[i].requests); - kfree(error->ring[i].waiters); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + + i915_error_object_free(ee->batchbuffer); + i915_error_object_free(ee->wa_batchbuffer); + i915_error_object_free(ee->ringbuffer); + i915_error_object_free(ee->hws_page); + i915_error_object_free(ee->ctx); + i915_error_object_free(ee->wa_ctx); + + kfree(ee->requests); + kfree(ee->waiters); } i915_error_object_free(error->semaphore_obj); @@ -667,14 +669,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv, if (i915_is_ggtt(vm)) vma = i915_gem_obj_to_ggtt(src); use_ggtt = (src->cache_level == I915_CACHE_NONE && - vma && (vma->bound & GLOBAL_BIND) && + vma && (vma->flags & I915_VMA_GLOBAL_BIND) && reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); /* Cannot access stolen address directly, try to use the aperture */ if (src->stolen) { use_ggtt = true; - if (!(vma && vma->bound & GLOBAL_BIND)) + if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND)) goto unwind; reloc_offset = i915_gem_obj_ggtt_offset(src); @@ -740,6 +742,24 @@ unwind: #define i915_error_ggtt_object_create(dev_priv, src) \ i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base) +/* The error capture is special as tries to run underneath the normal + * locking rules - so we use the raw version of the i915_gem_active lookup. + */ +static inline uint32_t +__active_get_seqno(struct i915_gem_active *active) +{ + return i915_gem_request_get_seqno(__i915_gem_active_peek(active)); +} + +static inline int +__active_get_engine_id(struct i915_gem_active *active) +{ + struct intel_engine_cs *engine; + + engine = i915_gem_request_get_engine(__i915_gem_active_peek(active)); + return engine ? engine->id : -1; +} + static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { @@ -748,9 +768,12 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; + for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); - err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); + err->rseqno[i] = __active_get_seqno(&obj->last_read[i]); + err->wseqno = __active_get_seqno(&obj->last_write); + err->engine = __active_get_engine_id(&obj->last_write); + err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -758,12 +781,10 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->pinned = 0; if (i915_gem_obj_is_pinned(obj)) err->pinned = 1; - err->tiling = obj->tiling_mode; + err->tiling = i915_gem_object_get_tiling(obj); err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->ring = obj->last_write_req ? - i915_gem_request_get_engine(obj->last_write_req)->id : -1; err->cache_level = obj->cache_level; } @@ -797,7 +818,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, break; list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && vma->pin_count > 0) + if (vma->vm == vm && i915_vma_is_pinned(vma)) capture_bo(err++, vma); } @@ -815,7 +836,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error, - int *ring_id) + int *engine_id) { uint32_t error_code = 0; int i; @@ -826,11 +847,11 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, * strictly a client bug. Use instdone to differentiate those some. */ for (i = 0; i < I915_NUM_ENGINES; i++) { - if (error->ring[i].hangcheck_action == HANGCHECK_HUNG) { - if (ring_id) - *ring_id = i; + if (error->engine[i].hangcheck_action == HANGCHECK_HUNG) { + if (engine_id) + *engine_id = i; - return error->ring[i].ipehr ^ error->ring[i].instdone; + return error->engine[i].ipehr ^ error->engine[i].instdone; } } @@ -855,21 +876,16 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, } -static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct drm_i915_error_state *error, struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) + struct drm_i915_error_engine *ee) { + struct drm_i915_private *dev_priv = engine->i915; struct intel_engine_cs *to; enum intel_engine_id id; - if (!i915_semaphore_is_enabled(dev_priv)) - return; - if (!error->semaphore_obj) - error->semaphore_obj = - i915_error_ggtt_object_create(dev_priv, - dev_priv->semaphore_obj); + return; for_each_engine_id(to, dev_priv, id) { int idx; @@ -879,42 +895,43 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, if (engine == to) continue; - signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) - / 4; + signal_offset = + (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; tmp = error->semaphore_obj->pages[0]; - idx = intel_ring_sync_index(engine, to); + idx = intel_engine_sync_index(engine, to); - ering->semaphore_mboxes[idx] = tmp[signal_offset]; - ering->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; + ee->semaphore_mboxes[idx] = tmp[signal_offset]; + ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; } } -static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void gen6_record_semaphore_state(struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { - ering->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); - ering->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); - ering->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; - ering->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; + struct drm_i915_private *dev_priv = engine->i915; + + ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); + ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); + ee->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; + ee->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; if (HAS_VEBOX(dev_priv)) { - ering->semaphore_mboxes[2] = + ee->semaphore_mboxes[2] = I915_READ(RING_SYNC_2(engine->mmio_base)); - ering->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; + ee->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; } } -static void engine_record_waiters(struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void error_record_engine_waiters(struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_error_waiter *waiter; struct rb_node *rb; int count; - ering->num_waiters = 0; - ering->waiters = NULL; + ee->num_waiters = 0; + ee->waiters = NULL; spin_lock(&b->lock); count = 0; @@ -930,7 +947,7 @@ static void engine_record_waiters(struct intel_engine_cs *engine, if (!waiter) return; - ering->waiters = waiter; + ee->waiters = waiter; spin_lock(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -941,55 +958,55 @@ static void engine_record_waiters(struct intel_engine_cs *engine, waiter->seqno = w->seqno; waiter++; - if (++ering->num_waiters == count) + if (++ee->num_waiters == count) break; } spin_unlock(&b->lock); } -static void i915_record_ring_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void error_record_engine_registers(struct drm_i915_error_state *error, + struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { + struct drm_i915_private *dev_priv = engine->i915; + if (INTEL_GEN(dev_priv) >= 6) { - ering->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); - ering->fault_reg = I915_READ(RING_FAULT_REG(engine)); + ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); + ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); if (INTEL_GEN(dev_priv) >= 8) - gen8_record_semaphore_state(dev_priv, error, engine, - ering); + gen8_record_semaphore_state(error, engine, ee); else - gen6_record_semaphore_state(dev_priv, engine, ering); + gen6_record_semaphore_state(engine, ee); } if (INTEL_GEN(dev_priv) >= 4) { - ering->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); - ering->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); - ering->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - ering->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); - ering->instps = I915_READ(RING_INSTPS(engine->mmio_base)); - ering->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); + ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); + ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); + ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); + ee->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); + ee->instps = I915_READ(RING_INSTPS(engine->mmio_base)); + ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); if (INTEL_GEN(dev_priv) >= 8) { - ering->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; - ering->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; + ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; + ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; } - ering->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); + ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); } else { - ering->faddr = I915_READ(DMA_FADD_I8XX); - ering->ipeir = I915_READ(IPEIR); - ering->ipehr = I915_READ(IPEHR); - ering->instdone = I915_READ(GEN2_INSTDONE); + ee->faddr = I915_READ(DMA_FADD_I8XX); + ee->ipeir = I915_READ(IPEIR); + ee->ipehr = I915_READ(IPEHR); + ee->instdone = I915_READ(GEN2_INSTDONE); } - ering->waiting = intel_engine_has_waiter(engine); - ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); - ering->acthd = intel_ring_get_active_head(engine); - ering->seqno = intel_engine_get_seqno(engine); - ering->last_seqno = engine->last_submitted_seqno; - ering->start = I915_READ_START(engine); - ering->head = I915_READ_HEAD(engine); - ering->tail = I915_READ_TAIL(engine); - ering->ctl = I915_READ_CTL(engine); + ee->waiting = intel_engine_has_waiter(engine); + ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); + ee->acthd = intel_engine_get_active_head(engine); + ee->seqno = intel_engine_get_seqno(engine); + ee->last_seqno = engine->last_submitted_seqno; + ee->start = I915_READ_START(engine); + ee->head = I915_READ_HEAD(engine); + ee->tail = I915_READ_TAIL(engine); + ee->ctl = I915_READ_CTL(engine); if (I915_NEED_GFX_HWS(dev_priv)) { i915_reg_t mmio; @@ -1017,29 +1034,29 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, mmio = RING_HWS_PGA(engine->mmio_base); } - ering->hws = I915_READ(mmio); + ee->hws = I915_READ(mmio); } - ering->hangcheck_score = engine->hangcheck.score; - ering->hangcheck_action = engine->hangcheck.action; + ee->hangcheck_score = engine->hangcheck.score; + ee->hangcheck_action = engine->hangcheck.action; if (USES_PPGTT(dev_priv)) { int i; - ering->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); + ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); if (IS_GEN6(dev_priv)) - ering->vm_info.pp_dir_base = + ee->vm_info.pp_dir_base = I915_READ(RING_PP_DIR_BASE_READ(engine)); else if (IS_GEN7(dev_priv)) - ering->vm_info.pp_dir_base = + ee->vm_info.pp_dir_base = I915_READ(RING_PP_DIR_BASE(engine)); else if (INTEL_GEN(dev_priv) >= 8) for (i = 0; i < 4; i++) { - ering->vm_info.pdp[i] = + ee->vm_info.pdp[i] = I915_READ(GEN8_RING_PDP_UDW(engine, i)); - ering->vm_info.pdp[i] <<= 32; - ering->vm_info.pdp[i] |= + ee->vm_info.pdp[i] <<= 32; + ee->vm_info.pdp[i] |= I915_READ(GEN8_RING_PDP_LDW(engine, i)); } } @@ -1048,7 +1065,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, static void i915_gem_record_active_context(struct intel_engine_cs *engine, struct drm_i915_error_state *error, - struct drm_i915_error_ring *ering) + struct drm_i915_error_engine *ee) { struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_object *obj; @@ -1062,7 +1079,7 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine, continue; if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { - ering->ctx = i915_error_ggtt_object_create(dev_priv, obj); + ee->ctx = i915_error_ggtt_object_create(dev_priv, obj); break; } } @@ -1075,23 +1092,31 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_gem_request *request; int i, count; + if (dev_priv->semaphore_obj) { + error->semaphore_obj = + i915_error_ggtt_object_create(dev_priv, + dev_priv->semaphore_obj); + } + for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = &dev_priv->engine[i]; + struct drm_i915_error_engine *ee = &error->engine[i]; - error->ring[i].pid = -1; + ee->pid = -1; + ee->engine_id = -1; if (!intel_engine_initialized(engine)) continue; - error->ring[i].valid = true; + ee->engine_id = i; - i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); - engine_record_waiters(engine, &error->ring[i]); + error_record_engine_registers(error, engine, ee); + error_record_engine_waiters(engine, ee); request = i915_gem_find_active_request(engine); if (request) { struct i915_address_space *vm; - struct intel_ringbuffer *rb; + struct intel_ring *ring; vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; @@ -1100,15 +1125,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, * as the simplest method to avoid being overwritten * by userspace. */ - error->ring[i].batchbuffer = + ee->batchbuffer = i915_error_object_create(dev_priv, request->batch_obj, vm); if (HAS_BROKEN_CS_TLB(dev_priv)) - error->ring[i].wa_batchbuffer = + ee->wa_batchbuffer = i915_error_ggtt_object_create(dev_priv, - engine->scratch.obj); + engine->scratch.obj); if (request->pid) { struct task_struct *task; @@ -1116,8 +1141,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, rcu_read_lock(); task = pid_task(request->pid, PIDTYPE_PID); if (task) { - strcpy(error->ring[i].comm, task->comm); - error->ring[i].pid = task->pid; + strcpy(ee->comm, task->comm); + ee->pid = task->pid; } rcu_read_unlock(); } @@ -1125,44 +1150,40 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, error->simulated |= request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; - rb = request->ringbuf; - error->ring[i].cpu_ring_head = rb->head; - error->ring[i].cpu_ring_tail = rb->tail; - error->ring[i].ringbuffer = + ring = request->ring; + ee->cpu_ring_head = ring->head; + ee->cpu_ring_tail = ring->tail; + ee->ringbuffer = i915_error_ggtt_object_create(dev_priv, - rb->obj); + ring->obj); } - error->ring[i].hws_page = + ee->hws_page = i915_error_ggtt_object_create(dev_priv, engine->status_page.obj); - if (engine->wa_ctx.obj) { - error->ring[i].wa_ctx = - i915_error_ggtt_object_create(dev_priv, - engine->wa_ctx.obj); - } + ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, + engine->wa_ctx.obj); - i915_gem_record_active_context(engine, error, &error->ring[i]); + i915_gem_record_active_context(engine, error, ee); count = 0; - list_for_each_entry(request, &engine->request_list, list) + list_for_each_entry(request, &engine->request_list, link) count++; - error->ring[i].num_requests = count; - error->ring[i].requests = - kcalloc(count, sizeof(*error->ring[i].requests), - GFP_ATOMIC); - if (error->ring[i].requests == NULL) { - error->ring[i].num_requests = 0; + ee->num_requests = count; + ee->requests = + kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); + if (!ee->requests) { + ee->num_requests = 0; continue; } count = 0; - list_for_each_entry(request, &engine->request_list, list) { + list_for_each_entry(request, &engine->request_list, link) { struct drm_i915_error_request *erq; - if (count >= error->ring[i].num_requests) { + if (count >= ee->num_requests) { /* * If the ring request list was changed in * between the point where the error request @@ -1181,8 +1202,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, break; } - erq = &error->ring[i].requests[count++]; - erq->seqno = request->seqno; + erq = &ee->requests[count++]; + erq->seqno = request->fence.seqno; erq->jiffies = request->emitted_jiffies; erq->tail = request->postfix; } @@ -1209,7 +1230,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && vma->pin_count > 0) + if (vma->vm == vm && i915_vma_is_pinned(vma)) i++; } error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; @@ -1352,20 +1373,20 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, const char *error_msg) { u32 ecode; - int ring_id = -1, len; + int engine_id = -1, len; - ecode = i915_error_generate_code(dev_priv, error, &ring_id); + ecode = i915_error_generate_code(dev_priv, error, &engine_id); len = scnprintf(error->error_msg, sizeof(error->error_msg), "GPU HANG: ecode %d:%d:0x%08x", - INTEL_GEN(dev_priv), ring_id, ecode); + INTEL_GEN(dev_priv), engine_id, ecode); - if (ring_id != -1 && error->ring[ring_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].pid != -1) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->ring[ring_id].comm, - error->ring[ring_id].pid); + error->engine[engine_id].comm, + error->engine[engine_id].pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e029db6a..03a5cef353eb 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -363,7 +363,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (engine->guc_id << GUC_ELC_ENGINE_OFFSET); - obj = ce->ringbuf->obj; + obj = ce->ring->obj; gfx_addr = i915_gem_obj_ggtt_offset(obj); lrc->ring_begin = gfx_addr; @@ -506,7 +506,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, rq->engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->seqno; + wqi->fence_id = rq->fence.seqno; kunmap_atomic(base); } @@ -585,7 +585,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) * The only error here arises if the doorbell hardware isn't functioning * as expected, which really shouln't happen. */ -int i915_guc_submit(struct drm_i915_gem_request *rq) +static void i915_guc_submit(struct drm_i915_gem_request *rq) { unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; @@ -601,9 +601,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) client->b_fail += 1; guc->submissions[engine_id] += 1; - guc->last_seqno[engine_id] = rq->seqno; - - return b_ret; + guc->last_seqno[engine_id] = rq->fence.seqno; } /* @@ -633,13 +631,13 @@ gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size) return NULL; if (i915_gem_object_get_pages(obj)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return NULL; } - if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { - drm_gem_object_unreference(&obj->base); + if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { + i915_gem_object_put(obj); return NULL; } @@ -661,7 +659,7 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) if (i915_gem_obj_is_pinned(obj)) i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } static void @@ -992,6 +990,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client; + struct intel_engine_cs *engine; /* client for execbuf submission */ client = guc_client_alloc(dev_priv, @@ -1006,6 +1005,10 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) host2guc_sample_forcewake(guc, client); guc_init_doorbell_hw(guc); + /* Take over from manual control of ELSP (execlists) */ + for_each_engine(engine, dev_priv) + engine->submit_request = i915_guc_submit; + return 0; } @@ -1013,6 +1016,12 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + if (!guc->execbuf_client) + return; + + /* Revert back to manual ELSP submission */ + intel_execlists_enable_submission(dev_priv); + guc_client_free(dev_priv, guc->execbuf_client); guc->execbuf_client = NULL; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1c2aec392412..591f452ece68 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -656,12 +656,6 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) * of horizontal active on the first line of vertical active */ -static u32 i8xx_get_vblank_counter(struct drm_device *dev, unsigned int pipe) -{ - /* Gen2 doesn't have a hardware frame counter */ - return 0; -} - /* Called from drm generic code, passed a 'crtc', which * we use as a pipe index */ @@ -1105,9 +1099,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.cur_freq; min = dev_priv->rps.min_freq_softlimit; max = dev_priv->rps.max_freq_softlimit; - - if (client_boost) { - new_delay = dev_priv->rps.max_freq_softlimit; + if (client_boost || any_waiters(dev_priv)) + max = dev_priv->rps.max_freq; + if (client_boost && new_delay < dev_priv->rps.boost_freq) { + new_delay = dev_priv->rps.boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1122,7 +1117,7 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.efficient_freq; adj = 0; } - } else if (any_waiters(dev_priv)) { + } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) @@ -2804,13 +2799,6 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) } static bool -ring_idle(struct intel_engine_cs *engine, u32 seqno) -{ - return i915_seqno_passed(seqno, - READ_ONCE(engine->last_submitted_seqno)); -} - -static bool ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { if (INTEL_GEN(engine->i915) >= 8) { @@ -2859,6 +2847,7 @@ static struct intel_engine_cs * semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) { struct drm_i915_private *dev_priv = engine->i915; + void __iomem *vaddr; u32 cmd, ipehr, head; u64 offset = 0; int i, backwards; @@ -2897,6 +2886,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) */ head = I915_READ_HEAD(engine) & HEAD_ADDR; backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; + vaddr = (void __iomem *)engine->buffer->vaddr; for (i = backwards; i; --i) { /* @@ -2907,7 +2897,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) head &= engine->buffer->size - 1; /* This here seems to blow up */ - cmd = ioread32(engine->buffer->virtual_start + head); + cmd = ioread32(vaddr + head); if (cmd == ipehr) break; @@ -2917,11 +2907,11 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) if (!i) return NULL; - *seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1; + *seqno = ioread32(vaddr + head + 4) + 1; if (INTEL_GEN(dev_priv) >= 8) { - offset = ioread32(engine->buffer->virtual_start + head + 12); + offset = ioread32(vaddr + head + 12); offset <<= 32; - offset = ioread32(engine->buffer->virtual_start + head + 8); + offset |= ioread32(vaddr + head + 8); } return semaphore_wait_to_signaller_ring(engine, ipehr, offset); } @@ -2990,7 +2980,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) return stuck; } -static enum intel_ring_hangcheck_action +static enum intel_engine_hangcheck_action head_stuck(struct intel_engine_cs *engine, u64 acthd) { if (acthd != engine->hangcheck.acthd) { @@ -3008,11 +2998,11 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static enum intel_ring_hangcheck_action -ring_stuck(struct intel_engine_cs *engine, u64 acthd) +static enum intel_engine_hangcheck_action +engine_stuck(struct intel_engine_cs *engine, u64 acthd) { struct drm_i915_private *dev_priv = engine->i915; - enum intel_ring_hangcheck_action ha; + enum intel_engine_hangcheck_action ha; u32 tmp; ha = head_stuck(engine, acthd); @@ -3121,14 +3111,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); - acthd = intel_ring_get_active_head(engine); + acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); /* Reset stuck interrupts between batch advances */ user_interrupts = 0; if (engine->hangcheck.seqno == seqno) { - if (ring_idle(engine, seqno)) { + if (!intel_engine_is_active(engine)) { engine->hangcheck.action = HANGCHECK_IDLE; if (busy) { /* Safeguard against driver failure */ @@ -3137,13 +3127,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) } } else { /* We always increment the hangcheck score - * if the ring is busy and still processing + * if the engine is busy and still processing * the same request, so that no single request * can run indefinitely (such as a chain of * batches). The only time we do not increment * the hangcheck score on this ring, if this - * ring is in a legitimate wait for another - * ring. In that case the waiting ring is a + * engine is in a legitimate wait for another + * engine. In that case the waiting engine is a * victim and we want to be sure we catch the * right culprit. Then every time we do kick * the ring, add a small increment to the @@ -3151,8 +3141,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) * being repeatedly kicked and so responsible * for stalling the machine. */ - engine->hangcheck.action = ring_stuck(engine, - acthd); + engine->hangcheck.action = + engine_stuck(engine, acthd); switch (engine->hangcheck.action) { case HANGCHECK_IDLE: @@ -4542,8 +4532,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv) i915_hangcheck_elapsed); if (IS_GEN2(dev_priv)) { + /* Gen2 doesn't have a hardware frame counter */ dev->max_vblank_count = 0; - dev->driver->get_vblank_counter = i8xx_get_vblank_counter; + dev->driver->get_vblank_counter = drm_vblank_no_hw_counter; } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) { dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ dev->driver->get_vblank_counter = g4x_get_vblank_counter; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 949c01686a66..2587b1bd41f4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -173,6 +173,7 @@ static const struct intel_device_info intel_pineview_info = { .gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2, .need_gfx_hws = 1, .has_hotplug = 1, .has_overlay = 1, + .ring_mask = RENDER_RING, GEN_DEFAULT_PIPEOFFSETS, CURSOR_OFFSETS, }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce14fe09d962..f38a5e20bbee 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,13 +186,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) -#define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) -#define RING_PP_DIR_BASE_READ(ring) _MMIO((ring)->mmio_base+0x518) -#define RING_PP_DIR_DCLV(ring) _MMIO((ring)->mmio_base+0x220) +#define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228) +#define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518) +#define RING_PP_DIR_DCLV(engine) _MMIO((engine)->mmio_base+0x220) #define PP_DIR_DCLV_2G 0xffffffff -#define GEN8_RING_PDP_UDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4) -#define GEN8_RING_PDP_LDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8) +#define GEN8_RING_PDP_UDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8 + 4) +#define GEN8_RING_PDP_LDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8) #define GEN8_R_PWR_CLK_STATE _MMIO(0x20C8) #define GEN8_RPCS_ENABLE (1 << 31) @@ -1536,6 +1536,7 @@ enum skl_disp_power_wells { #define BALANCE_LEG_MASK(port) (7<<(8+3*(port))) /* Balance leg disable bits */ #define BALANCE_LEG_DISABLE_SHIFT 23 +#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port))) /* * Fence registers @@ -1647,7 +1648,7 @@ enum skl_disp_power_wells { #define ARB_MODE_BWGTLB_DISABLE (1<<9) #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(ring) _MMIO(0x4094 + 0x100*(ring)->id) +#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->hw_id) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) @@ -1845,7 +1846,7 @@ enum skl_disp_power_wells { #define GFX_MODE _MMIO(0x2520) #define GFX_MODE_GEN7 _MMIO(0x229c) -#define RING_MODE_GEN7(ring) _MMIO((ring)->mmio_base+0x29c) +#define RING_MODE_GEN7(engine) _MMIO((engine)->mmio_base+0x29c) #define GFX_RUN_LIST_ENABLE (1<<15) #define GFX_INTERRUPT_STEERING (1<<14) #define GFX_TLB_INVALIDATE_EXPLICIT (1<<13) @@ -6132,6 +6133,7 @@ enum { # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) # define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) +# define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12) # define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8) # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) @@ -6958,6 +6960,9 @@ enum { #define ECOBUS _MMIO(0xa180) #define FORCEWAKE_MT_ENABLE (1<<5) #define VLV_SPAREG2H _MMIO(0xA194) +#define GEN9_PWRGT_DOMAIN_STATUS _MMIO(0xA2A0) +#define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) +#define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) @@ -7485,6 +7490,7 @@ enum { #define _DDI_BUF_TRANS_A 0x64E00 #define _DDI_BUF_TRANS_B 0x64E60 #define DDI_BUF_TRANS_LO(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8) +#define DDI_BUF_BALANCE_LEG_ENABLE (1 << 31) #define DDI_BUF_TRANS_HI(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4) /* Sideband Interface (SBI) is programmed indirectly, via diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61829e54f93..f1ffde7f7c0b 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -271,8 +271,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct drm_i915_private *dev_priv = to_i915(dev); int ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -303,19 +301,46 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.cur_freq)); +} - intel_runtime_pm_get(dev_priv); +static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) +{ + struct drm_minor *minor = dev_to_drm_minor(kdev); + struct drm_i915_private *dev_priv = to_i915(minor->dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.boost_freq)); +} + +static ssize_t gt_boost_freq_mhz_store(struct device *kdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_minor *minor = dev_to_drm_minor(kdev); + struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + u32 val; + ssize_t ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(dev_priv, val); + if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + return -EINVAL; mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq); + dev_priv->rps.boost_freq = val; mutex_unlock(&dev_priv->rps.hw_lock); - intel_runtime_pm_put(dev_priv); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return count; } static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, @@ -325,9 +350,9 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - return snprintf(buf, PAGE_SIZE, - "%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -335,15 +360,10 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -360,8 +380,6 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -403,15 +421,10 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -428,8 +441,6 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -465,6 +476,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); +static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); @@ -498,6 +510,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr static const struct attribute *gen6_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_boost_freq_mhz.attr, &dev_attr_gt_max_freq_mhz.attr, &dev_attr_gt_min_freq_mhz.attr, &dev_attr_gt_RP0_freq_mhz.attr, @@ -509,6 +522,7 @@ static const struct attribute *gen6_attrs[] = { static const struct attribute *vlv_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_boost_freq_mhz.attr, &dev_attr_gt_max_freq_mhz.attr, &dev_attr_gt_min_freq_mhz.attr, &dev_attr_gt_RP0_freq_mhz.attr, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 534154e05fbe..178798002a73 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags), - TP_ARGS(dev, size, align, flags), + TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) + __field(struct i915_address_space *, vm) __field(u32, size) __field(u32, align) - __field(unsigned, flags) + __field(unsigned int, flags) ), TP_fast_assign( - __entry->dev = dev->primary->index; + __entry->dev = vm->dev->primary->index; + __entry->vm = vm; __entry->size = size; __entry->align = align; __entry->flags = flags; ), - TP_printk("dev=%d, size=%d, align=%d %s", - __entry->dev, __entry->size, __entry->align, + TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); @@ -449,10 +451,9 @@ TRACE_EVENT(i915_gem_evict_vm, ); TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct drm_i915_gem_request *to_req, - struct intel_engine_cs *from, - struct drm_i915_gem_request *req), - TP_ARGS(to_req, from, req), + TP_PROTO(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from), + TP_ARGS(to, from), TP_STRUCT__entry( __field(u32, dev) @@ -463,9 +464,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, TP_fast_assign( __entry->dev = from->i915->drm.primary->index; - __entry->sync_from = from->id; - __entry->sync_to = to_req->engine->id; - __entry->seqno = i915_gem_request_get_seqno(req); + __entry->sync_from = from->engine->id; + __entry->sync_to = to->engine->id; + __entry->seqno = from->fence.seqno; ), TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", @@ -488,9 +489,9 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - intel_engine_enable_signaling(req); + fence_enable_sw_signaling(&req->fence); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", @@ -533,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; ), TP_printk("dev=%u, ring=%u, seqno=%u", @@ -595,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->blocking = mutex_is_locked(&req->i915->drm.struct_mutex); ), diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f6acb5a0e701..142bac976919 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -97,6 +97,7 @@ static struct _balloon_info_ bl_info; /** * intel_vgt_deballoon - deballoon reserved graphics address trunks + * @dev_priv: i915 device private data * * This function is called to deallocate the ballooned-out graphic memory, when * driver is unloaded or when ballooning fails. @@ -138,7 +139,7 @@ static int vgt_balloon_space(struct drm_mm *mm, /** * intel_vgt_balloon - balloon out reserved graphics address trunks - * @dev: drm device + * @dev_priv: i915 device private data * * This function is called at the initialization stage, to balloon out the * graphic address space allocated to other vGPUs, by marking these spaces as diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 6700a7be7f78..d32f586f9c05 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) return; + i915_audio_component_get_power(dev); + /* * Enable/disable generating the codec wake signal, overriding the * internal logic to generate the codec wake to controller. @@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, I915_WRITE(HSW_AUD_CHICKENBIT, tmp); usleep_range(1000, 1500); } + + i915_audio_component_put_power(dev); } /* Get CDCLK in kHz */ @@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, !IS_HASWELL(dev_priv)) return 0; + i915_audio_component_get_power(dev); mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ intel_encoder = dev_priv->dig_port_map[port]; @@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, unlock: mutex_unlock(&dev_priv->av_mutex); + i915_audio_component_put_power(dev); return err; } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b074f3d6d127..90867446f1a5 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -51,6 +51,13 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->breadcrumbs.irq_posted = true; + /* Make sure the current hangcheck doesn't falsely accuse a just + * started irq handler from missing an interrupt (because the + * interrupt count still matches the stale value from when + * the irq handler was disabled, many hangchecks ago). + */ + engine->breadcrumbs.irq_wakeups++; + spin_lock_irq(&engine->i915->irq_lock); engine->irq_enable(engine); spin_unlock_irq(&engine->i915->irq_lock); @@ -436,6 +443,7 @@ static int intel_breadcrumbs_signaler(void *arg) */ intel_engine_remove_wait(engine, &request->signaling.wait); + fence_signal(&request->fence); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -452,7 +460,7 @@ static int intel_breadcrumbs_signaler(void *arg) rb_erase(&request->signaling.node, &b->signals); spin_unlock(&b->lock); - i915_gem_request_unreference(request); + i915_gem_request_put(request); } else { if (kthread_should_stop()) break; @@ -472,18 +480,14 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct rb_node *parent, **p; bool first, wakeup; - if (unlikely(READ_ONCE(request->signaling.wait.tsk))) - return; - - spin_lock(&b->lock); - if (unlikely(request->signaling.wait.tsk)) { - wakeup = false; - goto unlock; - } + /* locked by fence_enable_sw_signaling() */ + assert_spin_locked(&request->lock); request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->seqno; - i915_gem_request_reference(request); + request->signaling.wait.seqno = request->fence.seqno; + i915_gem_request_get(request); + + spin_lock(&b->lock); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -504,8 +508,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->seqno, - to_signaler(parent)->seqno)) { + if (i915_seqno_passed(request->fence.seqno, + to_signaler(parent)->fence.seqno)) { p = &parent->rb_right; first = false; } else { @@ -517,7 +521,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) if (first) smp_store_mb(b->first_signal, request); -unlock: spin_unlock(&b->lock); if (wakeup) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 3edb9580928e..fb27d187876c 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -32,13 +32,6 @@ * onwards to drive newly added DMC (Display microcontroller) in display * engine to save and restore the state of display engine when it enter into * low-power state and comes back to normal. - * - * Firmware loading status will be one of the below states: FW_UNINITIALIZED, - * FW_LOADED, FW_FAILED. - * - * Once the firmware is written into the registers status will be moved from - * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will - * be moved to FW_FAILED. */ #define I915_CSR_KBL "i915/kbl_dmc_ver1.bin" diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index dd1d6fe12297..c2df4e429b19 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { { 0x0000201B, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x1 }, { 0x80009010, 0x000000C0, 0x1 }, { 0x0000201B, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x1 }, @@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { { 0x00000018, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x3 }, { 0x80009010, 0x000000C0, 0x3 }, { 0x00000018, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x3 }, @@ -301,45 +301,34 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ }; -static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type); - -static void ddi_get_encoder_port(struct intel_encoder *intel_encoder, - struct intel_digital_port **dig_port, - enum port *port) +enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) { - struct drm_encoder *encoder = &intel_encoder->base; - - switch (intel_encoder->type) { + switch (encoder->type) { case INTEL_OUTPUT_DP_MST: - *dig_port = enc_to_mst(encoder)->primary; - *port = (*dig_port)->port; - break; - default: - WARN(1, "Invalid DDI encoder type %d\n", intel_encoder->type); - /* fallthrough and treat as unknown */ + return enc_to_mst(&encoder->base)->primary->port; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_UNKNOWN: - *dig_port = enc_to_dig_port(encoder); - *port = (*dig_port)->port; - break; + return enc_to_dig_port(&encoder->base)->port; case INTEL_OUTPUT_ANALOG: - *dig_port = NULL; - *port = PORT_E; - break; + return PORT_E; + default: + MISSING_CASE(encoder->type); + return PORT_A; } } -enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) +static const struct ddi_buf_trans * +bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { - struct intel_digital_port *dig_port; - enum port port; - - ddi_get_encoder_port(intel_encoder, &dig_port, &port); - - return port; + if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + return bdw_ddi_translations_edp; + } else { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } } static const struct ddi_buf_trans * @@ -388,39 +377,58 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +{ + int n_hdmi_entries; + int hdmi_level; + int hdmi_default_entry; + + hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + + if (IS_BROXTON(dev_priv)) + return hdmi_level; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + hdmi_default_entry = 8; + } else if (IS_BROADWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } else if (IS_HASWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_default_entry = 6; + } else { + WARN(1, "ddi translation table missing\n"); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_default_entry; + + return hdmi_level; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct - * values in advance. The buffer values are different for FDI and DP modes, - * but the HDMI/DVI fields are shared among those. So we program the DDI - * in either FDI or DP modes only, as HDMI connections will work with both - * of those + * values in advance. This function programs the correct values for + * DP/eDP/FDI use cases. */ -void intel_prepare_ddi_buffer(struct intel_encoder *encoder) +void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry, - size; - int hdmi_level; - enum port port; + int i, n_dp_entries, n_edp_entries, size; + enum port port = intel_ddi_get_encoder_port(encoder); const struct ddi_buf_trans *ddi_translations_fdi; const struct ddi_buf_trans *ddi_translations_dp; const struct ddi_buf_trans *ddi_translations_edp; - const struct ddi_buf_trans *ddi_translations_hdmi; const struct ddi_buf_trans *ddi_translations; - port = intel_ddi_get_encoder_port(encoder); - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; - - if (IS_BROXTON(dev_priv)) { - if (encoder->type != INTEL_OUTPUT_HDMI) - return; - - /* Vswing programming for HDMI */ - bxt_ddi_vswing_sequence(dev_priv, hdmi_level, port, - INTEL_OUTPUT_HDMI); + if (IS_BROXTON(dev_priv)) return; - } if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { ddi_translations_fdi = NULL; @@ -428,13 +436,10 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) skl_get_buf_trans_dp(dev_priv, &n_dp_entries); ddi_translations_edp = skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - ddi_translations_hdmi = - skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; + /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || - dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = 1<<31; + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && port != PORT_A && port != PORT_E && @@ -443,38 +448,20 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) } else if (IS_BROADWELL(dev_priv)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - - if (dev_priv->vbt.edp.low_vswing) { - ddi_translations_edp = bdw_ddi_translations_edp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - } else { - ddi_translations_edp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } - - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - + ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; - ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_hdmi = bdw_ddi_translations_hdmi; n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } switch (encoder->type) { @@ -483,7 +470,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) size = n_edp_entries; break; case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: ddi_translations = ddi_translations_dp; size = n_dp_entries; break; @@ -501,19 +487,48 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) I915_WRITE(DDI_BUF_TRANS_HI(port, i), ddi_translations[i].trans2); } +} + +/* + * Starting with Haswell, DDI port buffers must be programmed with correct + * values in advance. This function programs the correct values for + * HDMI/DVI use cases. + */ +static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 iboost_bit = 0; + int n_hdmi_entries, hdmi_level; + enum port port = intel_ddi_get_encoder_port(encoder); + const struct ddi_buf_trans *ddi_translations_hdmi; - if (encoder->type != INTEL_OUTPUT_HDMI) + if (IS_BROXTON(dev_priv)) return; - /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; + hdmi_level = intel_ddi_hdmi_level(dev_priv, port); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + + /* If we're boosting the current, set bit 31 of trans1 */ + if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; + } else if (IS_BROADWELL(dev_priv)) { + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + } else if (IS_HASWELL(dev_priv)) { + ddi_translations_hdmi = hsw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + } else { + WARN(1, "ddi translation table missing\n"); + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + } /* Entry 9 is for HDMI: */ - I915_WRITE(DDI_BUF_TRANS_LO(port, i), + I915_WRITE(DDI_BUF_TRANS_LO(port, 9), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); - I915_WRITE(DDI_BUF_TRANS_HI(port, i), + I915_WRITE(DDI_BUF_TRANS_HI(port, 9), ddi_translations_hdmi[hdmi_level].trans2); } @@ -550,7 +565,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) for_each_encoder_on_crtc(dev, crtc, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); - intel_prepare_ddi_buffer(encoder); + intel_prepare_dp_ddi_buffers(encoder); } /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the @@ -1111,7 +1126,6 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe = intel_crtc->pipe; @@ -1177,29 +1191,15 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; - } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; temp |= (intel_crtc->config->fdi_lanes - 1) << 1; - } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - if (intel_dp->is_mst) { - temp |= TRANS_DDI_MODE_SELECT_DP_MST; - } else - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - + temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { - struct intel_dp *intel_dp = &enc_to_mst(encoder)->primary->dp; - - if (intel_dp->is_mst) { - temp |= TRANS_DDI_MODE_SELECT_DP_MST; - } else - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - + temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", @@ -1379,14 +1379,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) TRANS_CLK_SEL_DISABLED); } -static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, + enum port port, uint8_t iboost) { + u32 tmp; + + tmp = I915_READ(DISPIO_CR_TX_BMU_CR0); + tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port)); + if (iboost) + tmp |= iboost << BALANCE_LEG_SHIFT(port); + else + tmp |= BALANCE_LEG_DISABLE(port); + I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); +} + +static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum port port = intel_dig_port->port; + int type = encoder->type; const struct ddi_buf_trans *ddi_translations; uint8_t iboost; uint8_t dp_iboost, hdmi_iboost; int n_entries; - u32 reg; /* VBT may override standard boost values */ dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; @@ -1428,16 +1444,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, return; } - reg = I915_READ(DISPIO_CR_TX_BMU_CR0); - reg &= ~BALANCE_LEG_MASK(port); - reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port)); + _skl_ddi_set_iboost(dev_priv, port, iboost); - if (iboost) - reg |= iboost << BALANCE_LEG_SHIFT(port); - else - reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port); - - I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg); + if (port == PORT_A && intel_dig_port->max_lanes == 4) + _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, @@ -1568,7 +1578,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - skl_ddi_set_iboost(dev_priv, level, port, encoder->type); + skl_ddi_set_iboost(encoder, level); else if (IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1615,8 +1625,6 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); } - intel_prepare_ddi_buffer(intel_encoder); - if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_edp_panel_on(intel_dp); @@ -1627,6 +1635,8 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + intel_prepare_dp_ddi_buffers(intel_encoder); + intel_dp_set_link_params(intel_dp, crtc->config); intel_ddi_init_dp_buf_reg(intel_encoder); @@ -1637,6 +1647,15 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_dp_stop_link_train(intel_dp); } else if (type == INTEL_OUTPUT_HDMI) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + int level = intel_ddi_hdmi_level(dev_priv, port); + + intel_prepare_hdmi_ddi_buffers(intel_encoder); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + skl_ddi_set_iboost(intel_encoder, level); + else if (IS_BROXTON(dev_priv)) + bxt_ddi_vswing_sequence(dev_priv, level, port, + INTEL_OUTPUT_HDMI); intel_hdmi->set_infoframes(encoder, crtc->config->has_hdmi_sink, @@ -2105,7 +2124,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) val = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; - if (intel_dp->is_mst) + if (intel_dp->link_mst) val |= DP_TP_CTL_MODE_MST; else { val |= DP_TP_CTL_MODE_SST; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8cc361114112..c6f27ab99e8f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -34,6 +34,7 @@ #include <drm/drm_edid.h> #include <drm/drmP.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_gem_dmabuf.h" @@ -2465,9 +2466,8 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - obj->tiling_mode = plane_config->tiling; - if (obj->tiling_mode == I915_TILING_X) - obj->stride = fb->pitches[0]; + if (plane_config->tiling == I915_TILING_X) + obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; mode_cmd.pixel_format = fb->pixel_format; mode_cmd.width = fb->width; @@ -2488,7 +2488,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return true; out_unref_obj: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return false; } @@ -2593,14 +2593,15 @@ valid_fb: intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h; obj = intel_fb_obj(fb); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); - obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit; + atomic_or(to_intel_plane(primary)->frontbuffer_bit, + &obj->frontbuffer_bits); } static void i9xx_update_primary_plane(struct drm_plane *primary, @@ -2670,8 +2671,7 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, BUG(); } - if (INTEL_INFO(dev)->gen >= 4 && - obj->tiling_mode != I915_TILING_NONE) + if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) @@ -2780,7 +2780,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, BUG(); } - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) @@ -4564,12 +4564,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->base.state); - struct drm_device *dev = crtc->base.dev; struct drm_plane *primary = crtc->base.primary; struct drm_plane_state *old_pri_state = drm_atomic_get_existing_plane_state(old_state, primary); - intel_frontbuffer_flip(dev, pipe_config->fb_bits); + intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); crtc->wm.cxsr_allowed = true; @@ -4692,7 +4691,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask * to compute the mask of flip planes precisely. For the time being * consider this a flip to a NULL plane. */ - intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); + intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe)); } static void ironlake_crtc_enable(struct drm_crtc *crtc) @@ -10434,7 +10433,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@ -10945,13 +10944,13 @@ static void intel_unpin_work_fn(struct work_struct *__work) mutex_lock(&dev->struct_mutex); intel_unpin_fb_obj(work->old_fb, primary->state->rotation); - drm_gem_object_unreference(&work->pending_flip_obj->base); - - if (work->flip_queued_req) - i915_gem_request_assign(&work->flip_queued_req, NULL); + i915_gem_object_put(work->pending_flip_obj); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit); + i915_gem_request_put(work->flip_queued_req); + + intel_frontbuffer_flip_complete(to_i915(dev), + to_intel_plane(primary)->frontbuffer_bit); intel_fbc_post_update(crtc); drm_framebuffer_unreference(work->old_fb); @@ -11116,7 +11115,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11132,13 +11131,13 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, 0); /* aux display base address, unused */ + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, 0); /* aux display base address, unused */ return 0; } @@ -11150,7 +11149,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11163,13 +11162,13 @@ static int intel_gen3_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, MI_NOOP); return 0; } @@ -11181,7 +11180,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11195,11 +11194,11 @@ static int intel_gen4_queue_flip(struct drm_device *dev, * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset | - obj->tiling_mode); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | + i915_gem_object_get_tiling(obj)); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11207,7 +11206,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@ -11219,7 +11218,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11229,10 +11228,10 @@ static int intel_gen6_queue_flip(struct drm_device *dev, if (ret) return ret; - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -11242,7 +11241,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@ -11254,7 +11253,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; @@ -11275,7 +11274,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, } len = 4; - if (engine->id == RCS) { + if (req->engine->id == RCS) { len += 6; /* * On Gen 8, SRM is now taking an extra dword to accommodate @@ -11313,30 +11312,30 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * for the RCS also doesn't appear to drop events. Setting the DERRMR * to zero does lead to lockups within MI_DISPLAY_FLIP. */ - if (engine->id == RCS) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE | + if (req->engine->id == RCS) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEC_PRI_FLIP_DONE)); if (IS_GEN8(dev)) - intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT); else - intel_ring_emit(engine, MI_STORE_REGISTER_MEM | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, engine->scratch.gtt_offset + 256); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256); if (IS_GEN8(dev)) { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } } - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode)); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, (MI_NOOP)); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); + intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, (MI_NOOP)); return 0; } @@ -11371,7 +11370,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_request_get_engine(obj->last_write_req); + return engine != i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11440,7 +11440,7 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc, dspcntr = I915_READ(reg); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; @@ -11463,9 +11463,9 @@ static void intel_mmio_flip_work_func(struct work_struct *w) struct reservation_object *resv; if (work->flip_queued_req) - WARN_ON(__i915_wait_request(work->flip_queued_req, - false, NULL, - &dev_priv->rps.mmioflips)); + WARN_ON(i915_wait_request(work->flip_queued_req, + false, NULL, + NO_WAITBOOST)); /* For framebuffer backed by dmabuf, wait for fence */ resv = i915_gem_object_get_dmabuf_resv(obj); @@ -11576,7 +11576,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_flip_work *work; struct intel_engine_cs *engine; bool mmio_flip; - struct drm_i915_gem_request *request = NULL; + struct drm_i915_gem_request *request; int ret; /* @@ -11642,7 +11642,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, /* Reference the objects for the scheduled work. */ drm_framebuffer_reference(work->old_fb); - drm_gem_object_reference(&obj->base); crtc->primary->fb = fb; update_state_fb(crtc->primary); @@ -11650,7 +11649,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_fbc_pre_update(intel_crtc, intel_crtc->config, to_intel_plane_state(primary->state)); - work->pending_flip_obj = obj; + work->pending_flip_obj = i915_gem_object_get(obj); ret = i915_mutex_lock_interruptible(dev); if (ret) @@ -11669,13 +11668,15 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { engine = &dev_priv->engine[BCS]; - if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode) + if (i915_gem_object_get_tiling(obj) != + i915_gem_object_get_tiling(intel_fb_obj(work->old_fb))) /* vlv: DISPLAY_FLIP fails to change tiling */ engine = NULL; } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_request_get_engine(obj->last_write_req); + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@ -11684,22 +11685,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, mmio_flip = use_mmio_flip(engine, obj); - /* When using CS flips, we want to emit semaphores between rings. - * However, when using mmio flips we will create a task to do the - * synchronisation, so all we want here is to pin the framebuffer - * into the display plane and skip any waits. - */ - if (!mmio_flip) { - ret = i915_gem_object_sync(obj, engine, &request); - if (!ret && !request) { - request = i915_gem_request_alloc(engine, NULL); - ret = PTR_ERR_OR_ZERO(request); - } - - if (ret) - goto cleanup_pending; - } - ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); if (ret) goto cleanup_pending; @@ -11712,19 +11697,28 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - i915_gem_request_assign(&work->flip_queued_req, - obj->last_write_req); - + work->flip_queued_req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); schedule_work(&work->mmio_work); } else { - i915_gem_request_assign(&work->flip_queued_req, request); + request = i915_gem_request_alloc(engine, engine->last_context); + if (IS_ERR(request)) { + ret = PTR_ERR(request); + goto cleanup_unpin; + } + + ret = i915_gem_object_sync(obj, request); + if (ret) + goto cleanup_request; + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, page_flip_flags); if (ret) - goto cleanup_unpin; + goto cleanup_request; intel_mark_page_flip_active(intel_crtc, work); + work->flip_queued_req = i915_gem_request_get(request); i915_add_request_no_flush(request); } @@ -11732,25 +11726,25 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, to_intel_plane(primary)->frontbuffer_bit); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_prepare(dev, + intel_frontbuffer_flip_prepare(to_i915(dev), to_intel_plane(primary)->frontbuffer_bit); trace_i915_flip_request(intel_crtc->plane, obj); return 0; +cleanup_request: + i915_add_request_no_flush(request); cleanup_unpin: intel_unpin_fb_obj(fb, crtc->primary->state->rotation); cleanup_pending: - if (!IS_ERR_OR_NULL(request)) - i915_add_request_no_flush(request); atomic_dec(&intel_crtc->unpin_work_count); mutex_unlock(&dev->struct_mutex); cleanup: crtc->primary->fb = old_fb; update_state_fb(crtc->primary); - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); drm_framebuffer_unreference(work->old_fb); spin_lock_irq(&dev->event_lock); @@ -12298,6 +12292,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct drm_connector *connector; unsigned int used_ports = 0; + unsigned int used_mst_ports = 0; /* * Walk the connector list instead of the encoder @@ -12334,11 +12329,20 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) return false; used_ports |= port_mask; + break; + case INTEL_OUTPUT_DP_MST: + used_mst_ports |= + 1 << enc_to_mst(&encoder->base)->primary->port; + break; default: break; } } + /* can't mix MST and SST/HDMI on the same port */ + if (used_ports & used_mst_ports) + return false; + return true; } @@ -13506,8 +13510,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + true, NULL, NULL); if (ret) { /* Any hang should be swallowed by the wait */ WARN_ON(ret == -EIO); @@ -13619,8 +13623,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + true, NULL, NULL); /* EIO should be eaten, and we can't get interrupted in the * worker, and blocking commits have waited already. */ WARN_ON(ret); @@ -13797,19 +13801,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state) { struct drm_plane_state *old_plane_state; struct drm_plane *plane; - struct drm_i915_gem_object *obj, *old_obj; - struct intel_plane *intel_plane; int i; - mutex_lock(&state->dev->struct_mutex); - for_each_plane_in_state(state, plane, old_plane_state, i) { - obj = intel_fb_obj(plane->state->fb); - old_obj = intel_fb_obj(old_plane_state->fb); - intel_plane = to_intel_plane(plane); - - i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit); - } - mutex_unlock(&state->dev->struct_mutex); + for_each_plane_in_state(state, plane, old_plane_state, i) + i915_gem_track_fb(intel_fb_obj(old_plane_state->fb), + intel_fb_obj(plane->state->fb), + to_intel_plane(plane)->frontbuffer_bit); } /** @@ -14038,11 +14035,9 @@ intel_prepare_plane_fb(struct drm_plane *plane, } if (ret == 0) { - struct intel_plane_state *plane_state = - to_intel_plane_state(new_state); - - i915_gem_request_assign(&plane_state->wait_req, - obj->last_write_req); + to_intel_plane_state(new_state)->wait_req = + i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); } return ret; @@ -14063,6 +14058,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct intel_plane_state *old_intel_state; + struct intel_plane_state *intel_state = to_intel_plane_state(plane->state); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@ -14075,6 +14071,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, !INTEL_INFO(dev)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); + i915_gem_request_assign(&intel_state->wait_req, NULL); i915_gem_request_assign(&old_intel_state->wait_req, NULL); } @@ -14831,7 +14828,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) drm_framebuffer_cleanup(fb); mutex_lock(&dev->struct_mutex); WARN_ON(!intel_fb->obj->framebuffer_references--); - drm_gem_object_unreference(&intel_fb->obj->base); + i915_gem_object_put(intel_fb->obj); mutex_unlock(&dev->struct_mutex); kfree(intel_fb); } @@ -14920,15 +14917,15 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* Enforce that fb modifier and tiling mode match, but only for * X-tiled. This is needed for FBC. */ - if (!!(obj->tiling_mode == I915_TILING_X) != + if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) != !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); return -EINVAL; } } else { - if (obj->tiling_mode == I915_TILING_X) + if (i915_gem_object_get_tiling(obj) == I915_TILING_X) mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - else if (obj->tiling_mode == I915_TILING_Y) { + else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); return -EINVAL; } @@ -14972,9 +14969,10 @@ static int intel_framebuffer_init(struct drm_device *dev, } if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && - mode_cmd->pitches[0] != obj->stride) { + mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], obj->stride); + mode_cmd->pitches[0], + i915_gem_object_get_stride(obj)); return -EINVAL; } @@ -15068,13 +15066,13 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0])); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); + if (!obj) return ERR_PTR(-ENOENT); fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@ -15482,7 +15480,6 @@ void intel_modeset_init_hw(struct drm_device *dev) dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_init_clock_gating(dev); - intel_enable_gt_powersave(dev_priv); } /* diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 21b04c3eda41..8fe2afa5439e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1041,10 +1041,10 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (WARN_ON(txsize > 20)) return -E2BIG; + WARN_ON(!msg->buffer != !msg->size); + if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); - else - WARN_ON(msg->size); ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); if (ret > 0) { @@ -1447,7 +1447,7 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) if (WARN_ON(len <= 0)) return 162000; - return rates[rate_to_index(0, rates) - 1]; + return rates[len - 1]; } int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) @@ -1651,6 +1651,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, { intel_dp->link_rate = pipe_config->port_clock; intel_dp->lane_count = pipe_config->lane_count; + intel_dp->link_mst = intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST); } static void intel_dp_prepare(struct intel_encoder *encoder) @@ -3395,84 +3396,67 @@ intel_dp_link_down(struct intel_dp *intel_dp) } static bool -intel_dp_get_dpcd(struct intel_dp *intel_dp) +intel_dp_read_dpcd(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, sizeof(intel_dp->dpcd)) < 0) return false; /* aux transfer failed */ DRM_DEBUG_KMS("DPCD: %*ph\n", (int) sizeof(intel_dp->dpcd), intel_dp->dpcd); - if (intel_dp->dpcd[DP_DPCD_REV] == 0) - return false; /* DPCD not present */ + return intel_dp->dpcd[DP_DPCD_REV] != 0; +} - if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, - &intel_dp->sink_count, 1) < 0) - return false; +static bool +intel_edp_init_dpcd(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = + to_i915(dp_to_dig_port(intel_dp)->base.base.dev); - /* - * Sink count can change between short pulse hpd hence - * a member variable in intel_dp will track any changes - * between short pulse interrupts. - */ - intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + /* this function is meant to be called only once */ + WARN_ON(intel_dp->dpcd[DP_DPCD_REV] != 0); - /* - * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that - * a dongle is present but no display. Unless we require to know - * if a dongle is present or not, we don't need to update - * downstream port information. So, an early return here saves - * time from performing other operations which are not required. - */ - if (!is_edp(intel_dp) && !intel_dp->sink_count) + if (!intel_dp_read_dpcd(intel_dp)) return false; - /* Check if the panel supports PSR */ - memset(intel_dp->psr_dpcd, 0, sizeof(intel_dp->psr_dpcd)); - if (is_edp(intel_dp)) { - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, - intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { - dev_priv->psr.sink_support = true; - DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); - } - - if (INTEL_INFO(dev)->gen >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; - - dev_priv->psr.sink_support = true; - drm_dp_dpcd_read(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap, 1); - dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - } + if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) + dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & + DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - /* Read the eDP Display control capabilities registers */ - memset(intel_dp->edp_dpcd, 0, sizeof(intel_dp->edp_dpcd)); - if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && - (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, - intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == - sizeof(intel_dp->edp_dpcd))) - DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), - intel_dp->edp_dpcd); - } - - DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", - yesno(intel_dp_source_supports_hbr2(intel_dp)), - yesno(drm_dp_tps3_supported(intel_dp->dpcd))); + /* Check if the panel supports PSR */ + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, + intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + dev_priv->psr.sink_support = true; + DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); + } + + if (INTEL_GEN(dev_priv) >= 9 && + (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { + uint8_t frame_sync_cap; + + dev_priv->psr.sink_support = true; + drm_dp_dpcd_read(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap, 1); + dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; + /* PSR2 needs frame sync as well */ + dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; + DRM_DEBUG_KMS("PSR2 %s on sink", + dev_priv->psr.psr2_support ? "supported" : "not supported"); + } + + /* Read the eDP Display control capabilities registers */ + if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && + drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, + intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd) == + sizeof(intel_dp->edp_dpcd))) + DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), + intel_dp->edp_dpcd); /* Intermediate frequency support */ - if (is_edp(intel_dp) && (intel_dp->edp_dpcd[0] >= 0x03)) { /* eDp v1.4 or higher */ + if (intel_dp->edp_dpcd[0] >= 0x03) { /* eDp v1.4 or higher */ __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; int i; @@ -3491,7 +3475,36 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } - intel_dp_print_rates(intel_dp); + return true; +} + + +static bool +intel_dp_get_dpcd(struct intel_dp *intel_dp) +{ + if (!intel_dp_read_dpcd(intel_dp)) + return false; + + if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, + &intel_dp->sink_count, 1) < 0) + return false; + + /* + * Sink count can change between short pulse hpd hence + * a member variable in intel_dp will track any changes + * between short pulse interrupts. + */ + intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + + /* + * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that + * a dongle is present but no display. Unless we require to know + * if a dongle is present or not, we don't need to update + * downstream port information. So, an early return here saves + * time from performing other operations which are not required. + */ + if (!is_edp(intel_dp) && !intel_dp->sink_count) + return false; if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT)) @@ -3526,7 +3539,7 @@ intel_dp_probe_oui(struct intel_dp *intel_dp) } static bool -intel_dp_probe_mst(struct intel_dp *intel_dp) +intel_dp_can_mst(struct intel_dp *intel_dp) { u8 buf[1]; @@ -3539,18 +3552,30 @@ intel_dp_probe_mst(struct intel_dp *intel_dp) if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) return false; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1)) { - if (buf[0] & DP_MST_CAP) { - DRM_DEBUG_KMS("Sink is MST capable\n"); - intel_dp->is_mst = true; - } else { - DRM_DEBUG_KMS("Sink is not MST capable\n"); - intel_dp->is_mst = false; - } - } + if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1) + return false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); - return intel_dp->is_mst; + return buf[0] & DP_MST_CAP; +} + +static void +intel_dp_configure_mst(struct intel_dp *intel_dp) +{ + if (!i915.enable_dp_mst) + return; + + if (!intel_dp->can_mst) + return; + + intel_dp->is_mst = intel_dp_can_mst(intel_dp); + + if (intel_dp->is_mst) + DRM_DEBUG_KMS("Sink is MST capable\n"); + else + DRM_DEBUG_KMS("Sink is not MST capable\n"); + + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); } static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) @@ -3909,7 +3934,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); - u8 sink_irq_vector; + u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -3936,7 +3961,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) /* Try to read the source of the interrupt */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && - intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { + intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) && + sink_irq_vector != 0) { /* Clear interrupt source */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, @@ -3980,6 +4006,9 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) connector_status_connected : connector_status_disconnected; } + if (intel_dp_can_mst(intel_dp)) + return connector_status_connected; + /* If no HPD, poke DDC gently */ if (drm_probe_ddc(&intel_dp->aux.ddc)) return connector_status_connected; @@ -4217,8 +4246,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct drm_device *dev = connector->dev; enum drm_connector_status status; enum intel_display_power_domain power_domain; - bool ret; - u8 sink_irq_vector; + u8 sink_irq_vector = 0; power_domain = intel_display_port_aux_power_domain(intel_encoder); intel_display_power_get(to_i915(dev), power_domain); @@ -4252,10 +4280,17 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; + DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", + yesno(intel_dp_source_supports_hbr2(intel_dp)), + yesno(drm_dp_tps3_supported(intel_dp->dpcd))); + + intel_dp_print_rates(intel_dp); + intel_dp_probe_oui(intel_dp); - ret = intel_dp_probe_mst(intel_dp); - if (ret) { + intel_dp_configure_mst(intel_dp); + + if (intel_dp->is_mst) { /* * If we are in MST mode then this connector * won't appear connected or have anything @@ -4290,7 +4325,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) /* Try to read the source of the interrupt */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && - intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { + intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) && + sink_irq_vector != 0) { /* Clear interrupt source */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, @@ -5186,7 +5222,7 @@ unlock: /** * intel_edp_drrs_invalidate - Disable Idleness DRRS - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called everytime rendering on the given planes start. @@ -5194,10 +5230,9 @@ unlock: * * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. */ -void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits) +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -5229,7 +5264,7 @@ void intel_edp_drrs_invalidate(struct drm_device *dev, /** * intel_edp_drrs_flush - Restart Idleness DRRS - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called every time rendering on the given planes has @@ -5239,10 +5274,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev, * * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. */ -void intel_edp_drrs_flush(struct drm_device *dev, - unsigned frontbuffer_bits) +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -5413,14 +5447,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pps_unlock(intel_dp); /* Cache DPCD and EDID for edp. */ - has_dpcd = intel_dp_get_dpcd(intel_dp); + has_dpcd = intel_edp_init_dpcd(intel_dp); - if (has_dpcd) { - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) - dev_priv->no_aux_handshake = - intel_dp->dpcd[DP_MAX_DOWNSPREAD] & - DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - } else { + if (!has_dpcd) { /* if this fails, presume the device is a ghost */ DRM_INFO("failed to retrieve link info, disabling eDP\n"); goto out_vdd_off; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 68a005d729e9..629337dbca3d 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -170,10 +170,10 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) intel_mst->connector = found; if (intel_dp->active_mst_links == 0) { - intel_prepare_ddi_buffer(&intel_dig_port->base); - intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config); + intel_prepare_dp_ddi_buffers(&intel_dig_port->base); + intel_dp_set_link_params(intel_dp, intel_crtc->config); intel_ddi_init_dp_buf_reg(&intel_dig_port->base); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9c59521afb18..c29a429cbc45 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -849,6 +849,7 @@ struct intel_dp { int link_rate; uint8_t lane_count; uint8_t sink_count; + bool link_mst; bool has_audio; bool detect_done; enum hdmi_force_audio force_audio; @@ -1104,7 +1105,7 @@ void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config); -void intel_prepare_ddi_buffer(struct intel_encoder *encoder); +void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); void hsw_fdi_link_train(struct drm_crtc *crtc); void intel_ddi_init(struct drm_device *dev, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); @@ -1131,21 +1132,10 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -/* intel_frontbuffer.c */ -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin); -void intel_frontbuffer_flip_prepare(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip(struct drm_device *dev, - unsigned frontbuffer_bits); unsigned int intel_fb_align_height(struct drm_device *dev, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin); u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format); @@ -1381,11 +1371,12 @@ uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes); void intel_plane_destroy(struct drm_plane *plane); void intel_edp_drrs_enable(struct intel_dp *intel_dp); void intel_edp_drrs_disable(struct intel_dp *intel_dp); -void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits); +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port); + struct intel_digital_port *port); void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, @@ -1558,13 +1549,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con /* intel_psr.c */ void intel_psr_enable(struct intel_dp *intel_dp); void intel_psr_disable(struct intel_dp *intel_dp); -void intel_psr_invalidate(struct drm_device *dev, +void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); -void intel_psr_flush(struct drm_device *dev, +void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin); void intel_psr_init(struct drm_device *dev); -void intel_psr_single_frame_update(struct drm_device *dev, +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); /* intel_runtime_pm.c */ @@ -1664,13 +1655,6 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) atomic_dec(&dev_priv->pm.wakeref_count); } -/* TODO: convert users of these to rely instead on proper RPM refcounting */ -#define DISABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - disable_rpm_wakeref_asserts(dev_priv) - -#define ENABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - enable_rpm_wakeref_asserts(dev_priv) - void intel_runtime_pm_get(struct drm_i915_private *dev_priv); bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); @@ -1696,11 +1680,11 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); -void intel_reset_gt_powersave(struct drm_i915_private *dev_priv); -void gen6_update_ring_freq(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c new file mode 100644 index 000000000000..e9b301ae2d0c --- /dev/null +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -0,0 +1,231 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_ringbuffer.h" +#include "intel_lrc.h" + +static const struct engine_info { + const char *name; + unsigned exec_id; + unsigned guc_id; + u32 mmio_base; + unsigned irq_shift; + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); +} intel_engines[] = { + [RCS] = { + .name = "render ring", + .exec_id = I915_EXEC_RENDER, + .guc_id = GUC_RENDER_ENGINE, + .mmio_base = RENDER_RING_BASE, + .irq_shift = GEN8_RCS_IRQ_SHIFT, + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, + }, + [BCS] = { + .name = "blitter ring", + .exec_id = I915_EXEC_BLT, + .guc_id = GUC_BLITTER_ENGINE, + .mmio_base = BLT_RING_BASE, + .irq_shift = GEN8_BCS_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, + }, + [VCS] = { + .name = "bsd ring", + .exec_id = I915_EXEC_BSD, + .guc_id = GUC_VIDEO_ENGINE, + .mmio_base = GEN6_BSD_RING_BASE, + .irq_shift = GEN8_VCS1_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, + }, + [VCS2] = { + .name = "bsd2 ring", + .exec_id = I915_EXEC_BSD, + .guc_id = GUC_VIDEO_ENGINE2, + .mmio_base = GEN8_BSD2_RING_BASE, + .irq_shift = GEN8_VCS2_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd2_ring_buffer, + }, + [VECS] = { + .name = "video enhancement ring", + .exec_id = I915_EXEC_VEBOX, + .guc_id = GUC_VIDEOENHANCE_ENGINE, + .mmio_base = VEBOX_RING_BASE, + .irq_shift = GEN8_VECS_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, + }, +}; + +static struct intel_engine_cs * +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id) +{ + const struct engine_info *info = &intel_engines[id]; + struct intel_engine_cs *engine = &dev_priv->engine[id]; + + engine->id = id; + engine->i915 = dev_priv; + engine->name = info->name; + engine->exec_id = info->exec_id; + engine->hw_id = engine->guc_id = info->guc_id; + engine->mmio_base = info->mmio_base; + engine->irq_shift = info->irq_shift; + + return engine; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev: DRM device. + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + unsigned int mask = 0; + int (*init)(struct intel_engine_cs *engine); + unsigned int i; + int ret; + + WARN_ON(INTEL_INFO(dev_priv)->ring_mask == 0); + WARN_ON(INTEL_INFO(dev_priv)->ring_mask & + GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + if (!HAS_ENGINE(dev_priv, i)) + continue; + + if (i915.enable_execlists) + init = intel_engines[i].init_execlists; + else + init = intel_engines[i].init_legacy; + + if (!init) + continue; + + ret = init(intel_engine_setup(dev_priv, i)); + if (ret) + goto cleanup; + + mask |= ENGINE_MASK(i); + } + + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { + struct intel_device_info *info = + (struct intel_device_info *)&dev_priv->info; + info->ring_mask = mask; + } + + return 0; + +cleanup: + for (i = 0; i < I915_NUM_ENGINES; i++) { + if (i915.enable_execlists) + intel_logical_ring_cleanup(&dev_priv->engine[i]); + else + intel_engine_cleanup(&dev_priv->engine[i]); + } + + return ret; +} + +void intel_engine_init_hangcheck(struct intel_engine_cs *engine) +{ + memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); +} + +static void intel_engine_init_requests(struct intel_engine_cs *engine) +{ + init_request_active(&engine->last_request, NULL); + INIT_LIST_HEAD(&engine->request_list); +} + +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +void intel_engine_setup_common(struct intel_engine_cs *engine) +{ + INIT_LIST_HEAD(&engine->buffers); + INIT_LIST_HEAD(&engine->execlist_queue); + spin_lock_init(&engine->execlist_lock); + + engine->fence_context = fence_context_alloc(1); + + intel_engine_init_requests(engine); + intel_engine_init_hangcheck(engine); + i915_gem_batch_pool_init(engine, &engine->batch_pool); +} + +/** + * intel_engines_init_common - initialize cengine state which might require hw access + * @engine: Engine to initialize. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do require hardware access. + * + * Typcally done at later stages of submission mode specific engine setup. + * + * Returns zero on success or an error code on failure. + */ +int intel_engine_init_common(struct intel_engine_cs *engine) +{ + int ret; + + ret = intel_engine_init_breadcrumbs(engine); + if (ret) + return ret; + + return intel_engine_init_cmd_parser(engine); +} + +/** + * intel_engines_cleanup_common - cleans up the engine state created by + * the common initiailizers. + * @engine: Engine to cleanup. + * + * This cleans up everything created by the common helpers. + */ +void intel_engine_cleanup_common(struct intel_engine_cs *engine) +{ + intel_engine_cleanup_cmd_parser(engine); + intel_engine_fini_breadcrumbs(engine); + i915_gem_batch_pool_fini(&engine->batch_pool); +} diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 3f4e32f8baae..e67b09a3328c 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -741,7 +741,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.pixel_format = fb->pixel_format; cache->fb.stride = fb->pitches[0]; cache->fb.fence_reg = obj->fence_reg; - cache->fb.tiling_mode = obj->tiling_mode; + cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@ -1075,6 +1075,8 @@ out: /** * intel_fbc_enable: tries to enable FBC on the CRTC * @crtc: the CRTC + * @crtc_state: corresponding &drm_crtc_state for @crtc + * @plane_state: corresponding &drm_plane_state for the primary plane of @crtc * * This function checks if the given CRTC was chosen for FBC, then enables it if * possible. Notice that it doesn't activate FBC. It is valid to call @@ -1163,11 +1165,8 @@ void intel_fbc_disable(struct intel_crtc *crtc) return; mutex_lock(&fbc->lock); - if (fbc->crtc == crtc) { - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); + if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); - } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); @@ -1230,12 +1229,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (i915.enable_fbc >= 0) return !!i915.enable_fbc; + if (!HAS_FBC(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv)) return 1; return 0; } +static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) +{ +#ifdef CONFIG_INTEL_IOMMU + /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ + if (intel_iommu_gfx_mapped && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { + DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); + return true; + } +#endif + + return false; +} + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@ -1253,6 +1269,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->active = false; fbc->work.scheduled = false; + if (need_fbc_vtd_wa(dev_priv)) + mkwrite_device_info(dev_priv)->has_fbc = false; + i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 69d7ea576baa..2c14dfc5e4f0 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -41,6 +41,7 @@ #include <drm/drm_crtc.h> #include <drm/drm_fb_helper.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" @@ -158,7 +159,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, fb = __intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); ret = PTR_ERR(fb); goto out; } @@ -188,7 +189,7 @@ static int intelfb_create(struct drm_fb_helper *helper, struct i915_vma *vma; struct drm_i915_gem_object *obj; bool prealloc = false; - void *vaddr; + void __iomem *vaddr; int ret; if (intel_fb && @@ -767,7 +768,7 @@ void intel_fbdev_fini(struct drm_device *dev) if (!ifbdev) return; - flush_work(&dev_priv->fbdev_suspend_work); + cancel_work_sync(&dev_priv->fbdev_suspend_work); if (!current_is_async()) intel_fbdev_sync(ifbdev); diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index ac85357010b4..966de4c7c7a2 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -63,47 +63,30 @@ #include <drm/drmP.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include "i915_drv.h" -/** - * intel_fb_obj_invalidate - invalidate frontbuffer object - * @obj: GEM object to invalidate - * @origin: which operation caused the invalidation - * - * This function gets called every time rendering on the given object starts and - * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must - * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed - * until the rendering completes or a flip on this frontbuffer plane is - * scheduled. - */ -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin) +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin, + unsigned int frontbuffer_bits) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - if (!obj->frontbuffer_bits) - return; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (origin == ORIGIN_CS) { - mutex_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.busy_bits - |= obj->frontbuffer_bits; - dev_priv->fb_tracking.flip_bits - &= ~obj->frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); + dev_priv->fb_tracking.busy_bits |= frontbuffer_bits; + dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; + spin_unlock(&dev_priv->fb_tracking.lock); } - intel_psr_invalidate(dev, obj->frontbuffer_bits); - intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits); - intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin); + intel_psr_invalidate(dev_priv, frontbuffer_bits); + intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); + intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); } /** * intel_frontbuffer_flush - flush frontbuffer - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -113,64 +96,45 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -static void intel_frontbuffer_flush(struct drm_device *dev, +static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* Delay flushing when rings are still busy.*/ - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); if (!frontbuffer_bits) return; - intel_edp_drrs_flush(dev, frontbuffer_bits); - intel_psr_flush(dev, frontbuffer_bits, origin); + intel_edp_drrs_flush(dev_priv, frontbuffer_bits); + intel_psr_flush(dev_priv, frontbuffer_bits, origin); intel_fbc_flush(dev_priv, frontbuffer_bits, origin); } -/** - * intel_fb_obj_flush - flush frontbuffer object - * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering - * @origin: which operation caused the flush - * - * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. - */ -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin, + unsigned int frontbuffer_bits) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned frontbuffer_bits; - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - if (!obj->frontbuffer_bits) - return; - - frontbuffer_bits = obj->frontbuffer_bits; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (retire) { - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; - dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); } - intel_frontbuffer_flush(dev, frontbuffer_bits, origin); + if (frontbuffer_bits) + intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin); } /** * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. The actual @@ -180,23 +144,21 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_prepare(struct drm_device *dev, +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_psr_single_frame_update(dev, frontbuffer_bits); + intel_psr_single_frame_update(dev_priv, frontbuffer_bits); } /** * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after the flip has been latched and will complete @@ -204,23 +166,23 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_complete(struct drm_device *dev, +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Mask any cancelled flips. */ frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + if (frontbuffer_bits) + intel_frontbuffer_flush(dev_priv, + frontbuffer_bits, ORIGIN_FLIP); } /** * intel_frontbuffer_flip - synchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. This is for @@ -229,15 +191,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, * * Can be called without any locks held. */ -void intel_frontbuffer_flip(struct drm_device *dev, +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h new file mode 100644 index 000000000000..76ceb539f9f0 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __INTEL_FRONTBUFFER_H__ +#define __INTEL_FRONTBUFFER_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, + unsigned frontbuffer_bits); +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, + unsigned frontbuffer_bits); +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, + unsigned frontbuffer_bits); + +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin, + unsigned int frontbuffer_bits); +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin, + unsigned int frontbuffer_bits); + +/** + * intel_fb_obj_invalidate - invalidate frontbuffer object + * @obj: GEM object to invalidate + * @origin: which operation caused the invalidation + * + * This function gets called every time rendering on the given object starts and + * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must + * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed + * until the rendering completes or a flip on this frontbuffer plane is + * scheduled. + */ +static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + unsigned int frontbuffer_bits; + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!frontbuffer_bits) + return; + + __intel_fb_obj_invalidate(obj, origin, frontbuffer_bits); +} + +/** + * intel_fb_obj_flush - flush frontbuffer object + * @obj: GEM object to flush + * @retire: set when retiring asynchronous rendering + * @origin: which operation caused the flush + * + * This function gets called every time rendering on the given object has + * completed and frontbuffer caching can be started again. If @retire is true + * then any delayed flushes will be unblocked. + */ +static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin) +{ + unsigned int frontbuffer_bits; + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!frontbuffer_bits) + return; + + __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); +} + +#endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743740c0..623cf26cd784 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -160,7 +160,6 @@ extern int intel_guc_resume(struct drm_device *dev); int i915_guc_submission_init(struct drm_i915_private *dev_priv); int i915_guc_submission_enable(struct drm_i915_private *dev_priv); int i915_guc_wq_check_space(struct drm_i915_gem_request *rq); -int i915_guc_submit(struct drm_i915_gem_request *rq); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c69658d2c..3763e30cc165 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -323,7 +323,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } - ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); if (ret) { DRM_DEBUG_DRIVER("pin failed %d\n", ret); return ret; @@ -349,7 +349,9 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) } /* WaC6DisallowByGfxPause*/ - I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); + if (IS_SKL_REVID(dev, 0, SKL_REVID_C0) || + IS_BXT_REVID(dev, 0, BXT_REVID_B0)) + I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); if (IS_BROXTON(dev)) I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); @@ -662,7 +664,7 @@ fail: mutex_lock(&dev->struct_mutex); obj = guc_fw->guc_fw_obj; if (obj) - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); @@ -743,7 +745,7 @@ void intel_guc_fini(struct drm_device *dev) i915_guc_submission_fini(dev_priv); if (guc_fw->guc_fw_obj) - drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); + i915_gem_object_put(guc_fw->guc_fw_obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index f48957ea100d..5dc2c20f6ca1 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -525,7 +525,6 @@ void i915_hpd_poll_init_work(struct work_struct *work) { /** * intel_hpd_poll_init - enables/disables polling for connectors with hpd * @dev_priv: i915 device instance - * @enabled: Whether to enable or disable polling * * This function enables polling for all connectors, regardless of whether or * not they support hotplug detection. Under certain conditions HPD may not be diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 414ddda43922..309c5d9b1c57 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -288,7 +288,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context - * * @ctx: Context to work on * @engine: Engine the descriptor will be used with * @@ -297,12 +296,13 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * expensive to calculate, we'll just do it once and cache the result, * which remains valid until the context is unpinned. * - * This is what a descriptor looks like, from LSB to MSB: - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -373,7 +373,7 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state; - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -384,8 +384,8 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) execlists_update_context_pdps(ppgtt, reg_state); } -static void execlists_submit_requests(struct drm_i915_gem_request *rq0, - struct drm_i915_gem_request *rq1) +static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0, + struct drm_i915_gem_request *rq1) { struct drm_i915_private *dev_priv = rq0->i915; unsigned int fw_domains = rq0->engine->fw_domains; @@ -418,7 +418,7 @@ static inline void execlists_context_status_change( atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); } -static void execlists_context_unqueue(struct intel_engine_cs *engine) +static void execlists_unqueue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; struct drm_i915_gem_request *cursor, *tmp; @@ -441,7 +441,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine) * will update tail past first request's workload */ cursor->elsp_submitted = req0->elsp_submitted; list_del(&req0->execlist_link); - i915_gem_request_unreference(req0); + i915_gem_request_put(req0); req0 = cursor; } else { if (IS_ENABLED(CONFIG_DRM_I915_GVT)) { @@ -482,14 +482,11 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine) * resubmit the request. See gen8_emit_request() for where we * prepare the padding after the end of the request. */ - struct intel_ringbuffer *ringbuf; - - ringbuf = req0->ctx->engine[engine->id].ringbuf; req0->tail += 8; - req0->tail &= ringbuf->size - 1; + req0->tail &= req0->ring->size - 1; } - execlists_submit_requests(req0, req1); + execlists_elsp_submit_contexts(req0, req1); } static unsigned int @@ -514,7 +511,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id) execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT); list_del(&head_req->execlist_link); - i915_gem_request_unreference(head_req); + i915_gem_request_put(head_req); return 1; } @@ -539,10 +536,7 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer, return status; } -/** - * intel_lrc_irq_handler() - handle Context Switch interrupts - * @data: tasklet handler passed in unsigned long - * +/* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ @@ -603,7 +597,7 @@ static void intel_lrc_irq_handler(unsigned long data) if (submit_contexts) { if (!engine->disable_lite_restore_wa || (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE)) - execlists_context_unqueue(engine); + execlists_unqueue(engine); } spin_unlock(&engine->execlist_lock); @@ -612,7 +606,7 @@ static void intel_lrc_irq_handler(unsigned long data) DRM_ERROR("More than two context complete events?\n"); } -static void execlists_context_queue(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct drm_i915_gem_request *cursor; @@ -635,70 +629,19 @@ static void execlists_context_queue(struct drm_i915_gem_request *request) WARN(tail_req->elsp_submitted != 0, "More than 2 already-submitted reqs queued\n"); list_del(&tail_req->execlist_link); - i915_gem_request_unreference(tail_req); + i915_gem_request_put(tail_req); } } - i915_gem_request_reference(request); + i915_gem_request_get(request); list_add_tail(&request->execlist_link, &engine->execlist_queue); request->ctx_hw_id = request->ctx->hw_id; if (num_elements == 0) - execlists_context_unqueue(engine); + execlists_unqueue(engine); spin_unlock_bh(&engine->execlist_lock); } -static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - -static int execlists_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) -{ - const unsigned other_rings = ~intel_engine_flag(req->engine); - struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; - int ret; - - list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_object *obj = vma->obj; - - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); - if (ret) - return ret; - } - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; - } - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); - - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return logical_ring_invalidate_all_caches(req); -} - int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; @@ -717,7 +660,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return ret; } - request->ringbuf = ce->ringbuf; + request->ring = ce->ring; if (i915.enable_guc_submission) { /* @@ -762,7 +705,7 @@ err_unpin: } /* - * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * intel_logical_ring_advance() - advance the tail and prepare for submission * @request: Request to advance the logical ringbuffer of. * * The tail is updated in our logical ringbuffer struct, not in the actual context. What @@ -771,13 +714,13 @@ err_unpin: * point, the tail *inside* the context is updated and the ELSP written to. */ static int -intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) +intel_logical_ring_advance(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; - intel_logical_ring_advance(ringbuf); - request->tail = ringbuf->tail; + intel_ring_advance(ring); + request->tail = ring->tail; /* * Here we add two extra NOOPs as padding to avoid @@ -785,9 +728,9 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) * * Caller must reserve WA_TAIL_DWORDS for us! */ - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* We keep the previous context alive until we retire the following * request. This ensures that any the context object is still pinned @@ -797,100 +740,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) */ request->previous_context = engine->last_context; engine->last_context = request->ctx; - - if (i915.enable_guc_submission) - i915_guc_submit(request); - else - execlists_context_queue(request); - - return 0; -} - -/** - * execlists_submission() - submit a batchbuffer for execution, Execlists style - * @params: execbuffer call parameters. - * @args: execbuffer call arguments. - * @vmas: list of vmas. - * - * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts - * away the submission details of the execbuffer ioctl call. - * - * Return: non-zero if the submission fails. - */ -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) -{ - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf = params->ctx->engine[engine->id].ringbuf; - u64 exec_start; - int instp_mode; - u32 instp_mask; - int ret; - - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); - return -EINVAL; - } - - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - DRM_DEBUG("sol reset is gen7 only\n"); - return -EINVAL; - } - - ret = execlists_move_to_gpu(params->request, vmas); - if (ret) - return ret; - - if (engine == &dev_priv->engine[RCS] && - instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_logical_ring_emit_reg(ringbuf, INSTPM); - intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); - intel_logical_ring_advance(ringbuf); - - dev_priv->relative_constants_mode = instp_mode; - } - - exec_start = params->batch_obj_vm_offset + - args->batch_start_offset; - - ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags); - if (ret) - return ret; - - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - - i915_gem_execbuffer_move_to_active(vmas, params->request); - return 0; } @@ -907,51 +756,10 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) { list_del(&req->execlist_link); - i915_gem_request_unreference(req); + i915_gem_request_put(req); } } -void intel_logical_ring_stop(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - /* TODO: Is this correct with Execlists enabled? */ - I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register(dev_priv, - RING_MI_MODE(engine->mmio_base), - MODE_IDLE, MODE_IDLE, - 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", engine->name); - return; - } - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); -} - -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -966,8 +774,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; - ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + ret = i915_gem_object_ggtt_pin(ce->state, NULL, + 0, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (ret) goto err; @@ -979,15 +788,14 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ringbuf); + ret = intel_ring_pin(ce->ring); if (ret) goto unpin_map; - i915_gem_context_reference(ctx); ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ringbuf->vma->node.start; + lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start; ce->lrc_reg_state = lrc_reg_state; ce->state->dirty = true; @@ -995,6 +803,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (i915.enable_guc_submission) I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + i915_gem_context_get(ctx); return 0; unpin_map: @@ -1017,7 +826,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, if (--ce->pin_count) return; - intel_unpin_ringbuffer_obj(ce->ringbuf); + intel_ring_unpin(ce->ring); i915_gem_object_unpin_map(ce->state); i915_gem_object_ggtt_unpin(ce->state); @@ -1026,21 +835,19 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, ce->lrc_desc = 0; ce->lrc_reg_state = NULL; - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; - struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1048,17 +855,16 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr); - intel_logical_ring_emit(ringbuf, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1094,7 +900,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * code duplication. */ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *const batch, + uint32_t *batch, uint32_t index) { struct drm_i915_private *dev_priv = engine->i915; @@ -1156,37 +962,24 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, return 0; } -/** - * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. This is updated - * with the offset value received as input. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of the batch, it should be - * cache-aligned otherwise it is adjusted accordingly. - * Typically we only have one indirect_ctx and per_ctx batch buffer which are - * initialized at the beginning and shared across all contexts but this field - * helps us to have multiple batches at different offsets and select them based - * on a criteria. At the moment this batch always start at the beginning of the page - * and at this point we don't have multiple wa_ctx batch buffers. - * - * The number of WA applied are not known at the beginning; we use this field - * to return the no of DWORDS written. +/* + * Typically we only have one indirect_ctx and per_ctx batch buffer which are + * initialized at the beginning and shared across all contexts but this field + * helps us to have multiple batches at different offsets and select them based + * on a criteria. At the moment this batch always start at the beginning of the page + * and at this point we don't have multiple wa_ctx batch buffers. * - * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END - * so it adds NOOPs as padding to make it cacheline aligned. - * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together - * makes a complete batch buffer. + * The number of WA applied are not known at the beginning; we use this field + * to return the no of DWORDS written. * - * Return: non-zero if we exceed the PAGE_SIZE limit. + * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END + * so it adds NOOPs as padding to make it cacheline aligned. + * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together + * makes a complete batch buffer. */ - static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t scratch_addr; @@ -1230,26 +1023,18 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); } -/** - * gen8_init_perctx_bb() - initialize per ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of this batch. - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. +/* + * This batch is started immediately after indirect_ctx batch. Since we ensure + * that indirect_ctx ends on a cacheline this batch is aligned automatically. * - * The number of DWORDS written are returned using this field. + * The number of DWORDS written are returned using this field. * * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1264,7 +1049,7 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { int ret; @@ -1282,6 +1067,13 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, return ret; index = ret; + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ + wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); + wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); + wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); + wa_ctx_emit(batch, index, MI_NOOP); + /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { @@ -1332,7 +1124,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, static int gen9_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1389,11 +1181,12 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) return ret; } - ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0); + ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL, + 0, PAGE_SIZE, 0); if (ret) { DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", ret); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); + i915_gem_object_put(engine->wa_ctx.obj); return ret; } @@ -1404,7 +1197,7 @@ static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) { if (engine->wa_ctx.obj) { i915_gem_object_ggtt_unpin(engine->wa_ctx.obj); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); + i915_gem_object_put(engine->wa_ctx.obj); engine->wa_ctx.obj = NULL; } } @@ -1572,8 +1365,8 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; int i, ret; @@ -1581,28 +1374,27 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_UDW(engine, i)); - intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr)); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_LDW(engine, i)); - intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); + intel_ring_emit(ring, upper_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); + intel_ring_emit(ring, lower_32_bits(pd_daddr)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags) + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -1629,14 +1421,14 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); - intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | + (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1655,14 +1447,10 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 unused) +static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - struct drm_i915_private *dev_priv = request->i915; - uint32_t cmd; + struct intel_ring *ring = request->ring; + u32 cmd; int ret; ret = intel_ring_begin(request, 4); @@ -1678,29 +1466,28 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, */ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - if (invalidate_domains & I915_GEM_GPU_DOMAINS) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_INVALIDATE_TLB; - if (engine == &dev_priv->engine[VCS]) + if (request->engine->id == VCS) cmd |= MI_INVALIDATE_BSD; } - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); /* upper addr */ - intel_logical_ring_emit(ringbuf, 0); /* value */ - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + intel_ring_advance(ring); return 0; } static int gen8_emit_flush_render(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains) + u32 mode) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; + struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = request->engine; u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; @@ -1709,14 +1496,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -1751,40 +1538,40 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return ret; if (vf_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, flags); - intel_logical_ring_emit(ringbuf, scratch_addr); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); return 0; } @@ -1813,7 +1600,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) static int gen8_emit_request(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); @@ -1823,21 +1610,20 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - intel_logical_ring_emit(ringbuf, - (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine) | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, request->seqno); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); + intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(ring, + intel_hws_seqno_address(request->engine) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, request->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + return intel_logical_ring_advance(request); } static int gen8_emit_request_render(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); @@ -1851,50 +1637,19 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, - (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, + (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(request->engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(request)); /* We're thrashing one dword of HWS. */ - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); -} - -static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; - - ret = i915_gem_render_state_prepare(req->engine, &so); - if (ret) - return ret; - - if (so.rodata == NULL) - return 0; - - ret = req->engine->emit_bb_start(req, so.ggtt_offset, - I915_DISPATCH_SECURE); - if (ret) - goto out; - - ret = req->engine->emit_bb_start(req, - (so.ggtt_offset + so.aux_batch_offset), - I915_DISPATCH_SECURE); - if (ret) - goto out; - - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); - return ret; + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + return intel_logical_ring_advance(request); } static int gen8_init_rcs_context(struct drm_i915_gem_request *req) @@ -1913,14 +1668,12 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return intel_lr_context_render_state_init(req); + return i915_gem_render_state_init(req); } /** * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer - * * @engine: Engine Command Streamer. - * */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) { @@ -1939,17 +1692,13 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_logical_ring_stop(engine); WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); } if (engine->cleanup) engine->cleanup(engine); - i915_cmd_parser_fini_ring(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); if (engine->status_page.obj) { i915_gem_object_unpin_map(engine->status_page.obj); @@ -1965,13 +1714,23 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) engine->i915 = NULL; } +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) + engine->submit_request = execlists_submit_request; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ engine->init_hw = gen8_init_common_ring; - engine->emit_request = gen8_emit_request; engine->emit_flush = gen8_emit_flush; + engine->emit_request = gen8_emit_request; + engine->submit_request = execlists_submit_request; + engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; @@ -1980,8 +1739,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift) +logical_ring_default_irqs(struct intel_engine_cs *engine) { + unsigned shift = engine->irq_shift; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } @@ -2004,17 +1764,46 @@ lrc_setup_hws(struct intel_engine_cs *engine, return 0; } +static void +logical_ring_setup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + enum forcewake_domains fw_domains; + + intel_engine_setup_common(engine); + + /* Intentionally left blank. */ + engine->buffer = NULL; + + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, + RING_ELSP(engine), + FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_PTR(engine), + FW_REG_READ | FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_BUF_BASE(engine), + FW_REG_READ); + + engine->fw_domains = fw_domains; + + tasklet_init(&engine->irq_tasklet, + intel_lrc_irq_handler, (unsigned long)engine); + + logical_ring_init_platform_invariants(engine); + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); +} + static int logical_ring_init(struct intel_engine_cs *engine) { struct i915_gem_context *dctx = engine->i915->kernel_context; int ret; - ret = intel_engine_init_breadcrumbs(engine); - if (ret) - goto error; - - ret = i915_cmd_parser_init_ring(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; @@ -2044,11 +1833,13 @@ error: return ret; } -static int logical_render_ring_init(struct intel_engine_cs *engine) +int logical_render_ring_init(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; int ret; + logical_ring_setup(engine); + if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; @@ -2085,160 +1876,11 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } -static const struct logical_ring_info { - const char *name; - unsigned exec_id; - unsigned guc_id; - u32 mmio_base; - unsigned irq_shift; - int (*init)(struct intel_engine_cs *engine); -} logical_rings[] = { - [RCS] = { - .name = "render ring", - .exec_id = I915_EXEC_RENDER, - .guc_id = GUC_RENDER_ENGINE, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init = logical_render_ring_init, - }, - [BCS] = { - .name = "blitter ring", - .exec_id = I915_EXEC_BLT, - .guc_id = GUC_BLITTER_ENGINE, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VCS] = { - .name = "bsd ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VCS2] = { - .name = "bsd2 ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE2, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VECS] = { - .name = "video enhancement ring", - .exec_id = I915_EXEC_VEBOX, - .guc_id = GUC_VIDEOENHANCE_ENGINE, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init = logical_ring_init, - }, -}; - -static struct intel_engine_cs * -logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) +int logical_xcs_ring_init(struct intel_engine_cs *engine) { - const struct logical_ring_info *info = &logical_rings[id]; - struct intel_engine_cs *engine = &dev_priv->engine[id]; - enum forcewake_domains fw_domains; + logical_ring_setup(engine); - engine->id = id; - engine->name = info->name; - engine->exec_id = info->exec_id; - engine->guc_id = info->guc_id; - engine->mmio_base = info->mmio_base; - - engine->i915 = dev_priv; - - /* Intentionally left blank. */ - engine->buffer = NULL; - - fw_domains = intel_uncore_forcewake_for_reg(dev_priv, - RING_ELSP(engine), - FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_PTR(engine), - FW_REG_READ | FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_BUF_BASE(engine), - FW_REG_READ); - - engine->fw_domains = fw_domains; - - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->buffers); - INIT_LIST_HEAD(&engine->execlist_queue); - spin_lock_init(&engine->execlist_lock); - - tasklet_init(&engine->irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); - - logical_ring_init_platform_invariants(engine); - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine, info->irq_shift); - - intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); - - return engine; -} - -/** - * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers - * @dev: DRM device. - * - * This function inits the engines for an Execlists submission style (the - * equivalent in the legacy ringbuffer submission world would be - * i915_gem_init_engines). It does it only for those engines that are present in - * the hardware. - * - * Return: non-zero if the initialization failed. - */ -int intel_logical_rings_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int mask = 0; - unsigned int i; - int ret; - - WARN_ON(INTEL_INFO(dev_priv)->ring_mask & - GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); - - for (i = 0; i < ARRAY_SIZE(logical_rings); i++) { - if (!HAS_ENGINE(dev_priv, i)) - continue; - - if (!logical_rings[i].init) - continue; - - ret = logical_rings[i].init(logical_ring_setup(dev_priv, i)); - if (ret) - goto cleanup; - - mask |= ENGINE_MASK(i); - } - - /* - * Catch failures to update logical_rings table when the new engines - * are added to the driver by a warning and disabling the forgotten - * engines. - */ - if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { - struct intel_device_info *info = - (struct intel_device_info *)&dev_priv->info; - info->ring_mask = mask; - } - - return 0; - -cleanup: - for (i = 0; i < I915_NUM_ENGINES; i++) - intel_logical_ring_cleanup(&dev_priv->engine[i]); - - return ret; + return logical_ring_init(engine); } static u32 @@ -2309,7 +1951,7 @@ static int populate_lr_context(struct i915_gem_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_engine_cs *engine, - struct intel_ringbuffer *ringbuf) + struct intel_ring *ring) { struct drm_i915_private *dev_priv = ctx->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; @@ -2362,7 +2004,7 @@ populate_lr_context(struct i915_gem_context *ctx, RING_START(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(engine->mmio_base), - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, @@ -2484,26 +2126,13 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine) return ret; } -/** - * execlists_context_deferred_alloc() - create the LRC specific bits of a context - * @ctx: LR context to create. - * @engine: engine to be used with the context. - * - * This function can be called more than once, with different engines, if we plan - * to use the context with them. The context backing objects and the ringbuffers - * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why - * the creation is a deferred call: it's better to make sure first that we need to use - * a given ring with the context. - * - * Return: non-zero on error. - */ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; uint32_t context_size; - struct intel_ringbuffer *ringbuf; + struct intel_ring *ring; int ret; WARN_ON(ce->state); @@ -2519,29 +2148,29 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - ringbuf = intel_engine_create_ringbuffer(engine, ctx->ring_size); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + ring = intel_engine_create_ring(engine, ctx->ring_size); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error_deref_obj; } - ret = populate_lr_context(ctx, ctx_obj, engine, ringbuf); + ret = populate_lr_context(ctx, ctx_obj, engine, ring); if (ret) { DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); - goto error_ringbuf; + goto error_ring_free; } - ce->ringbuf = ringbuf; + ce->ring = ring; ce->state = ctx_obj; ce->initialised = engine->init_context == NULL; return 0; -error_ringbuf: - intel_ringbuffer_free(ringbuf); +error_ring_free: + intel_ring_free(ring); error_deref_obj: - drm_gem_object_unreference(&ctx_obj->base); - ce->ringbuf = NULL; + i915_gem_object_put(ctx_obj); + ce->ring = NULL; ce->state = NULL; return ret; } @@ -2572,7 +2201,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, i915_gem_object_unpin_map(ctx_obj); - ce->ringbuf->head = 0; - ce->ringbuf->tail = 0; + ce->ring->head = 0; + ce->ring->tail = 0; } } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 2b8255c19dcc..a52cf57dbd40 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,17 +29,17 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 /* Execlists regs */ -#define RING_ELSP(ring) _MMIO((ring)->mmio_base + 0x230) -#define RING_EXECLIST_STATUS_LO(ring) _MMIO((ring)->mmio_base + 0x234) -#define RING_EXECLIST_STATUS_HI(ring) _MMIO((ring)->mmio_base + 0x234 + 4) -#define RING_CONTEXT_CONTROL(ring) _MMIO((ring)->mmio_base + 0x244) +#define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230) +#define RING_EXECLIST_STATUS_LO(engine) _MMIO((engine)->mmio_base + 0x234) +#define RING_EXECLIST_STATUS_HI(engine) _MMIO((engine)->mmio_base + 0x234 + 4) +#define RING_CONTEXT_CONTROL(engine) _MMIO((engine)->mmio_base + 0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) -#define RING_CONTEXT_STATUS_BUF_BASE(ring) _MMIO((ring)->mmio_base + 0x370) -#define RING_CONTEXT_STATUS_BUF_LO(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8) -#define RING_CONTEXT_STATUS_BUF_HI(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4) -#define RING_CONTEXT_STATUS_PTR(ring) _MMIO((ring)->mmio_base + 0x3a0) +#define RING_CONTEXT_STATUS_BUF_BASE(engine) _MMIO((engine)->mmio_base + 0x370) +#define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) +#define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) +#define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer @@ -67,35 +67,10 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); -int intel_logical_rings_init(struct drm_device *dev); +int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_xcs_ring_init(struct intel_engine_cs *engine); -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); -/** - * intel_logical_ring_advance() - advance the ringbuffer tail - * @ringbuf: Ringbuffer to advance. - * - * The tail is only updated in our logical ringbuffer struct. - */ -static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) -{ - ringbuf->tail &= ringbuf->size - 1; -} -/** - * intel_logical_ring_emit() - write a DWORD to the ringbuffer. - * @ringbuf: Ringbuffer to write to. - * @data: DWORD to write. - */ -static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, - u32 data) -{ - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; -} -static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf, - i915_reg_t reg) -{ - intel_logical_ring_emit(ringbuf, i915_mmio_reg_offset(reg)); -} +int intel_engines_init(struct drm_device *dev); /* Logical Ring Contexts */ @@ -120,10 +95,7 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); -struct i915_execbuffer_params; -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); void intel_execlists_cancel_requests(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f5b284..80bb9247ce66 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x00000009 */ + [I915_MOCS_UNCACHED] = { + /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0000003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x00000039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -203,9 +204,9 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, return result; } -static i915_reg_t mocs_register(enum intel_engine_id ring, int index) +static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) { - switch (ring) { + switch (engine_id) { case RCS: return GEN9_GFX_MOCS(index); case VCS: @@ -217,7 +218,7 @@ static i915_reg_t mocs_register(enum intel_engine_id ring, int index) case VCS2: return GEN9_MFX1_MOCS(index); default: - MISSING_CASE(ring); + MISSING_CASE(engine_id); return INVALID_MMIO_REG; } } @@ -275,7 +276,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; int ret; @@ -287,14 +288,11 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_logical_ring_emit(ringbuf, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); for (index = 0; index < table->size; index++) { - intel_logical_ring_emit_reg(ringbuf, - mocs_register(engine, index)); - intel_logical_ring_emit(ringbuf, - table->table[index].control_value); + intel_ring_emit_reg(ring, mocs_register(engine, index)); + intel_ring_emit(ring, table->table[index].control_value); } /* @@ -306,14 +304,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_logical_ring_emit_reg(ringbuf, - mocs_register(engine, index)); - intel_logical_ring_emit(ringbuf, - table->table[0].control_value); + intel_ring_emit_reg(ring, mocs_register(engine, index)); + intel_ring_emit(ring, table->table[0].control_value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -340,7 +336,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; unsigned int i; int ret; @@ -351,19 +347,18 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_logical_ring_emit(ringbuf, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); for (i = 0; i < table->size/2; i++) { - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, - l3cc_combine(table, 2*i, 2*i+1)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, l3cc_combine(table, 2*i, 0)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); i++; } @@ -373,12 +368,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, l3cc_combine(table, 0, 0)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 0, 0)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index 4640299e04ec..a8bd9f7bfece 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -54,6 +54,6 @@ int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); void intel_mocs_init_l3cc_table(struct drm_device *dev); -int intel_mocs_init_engine(struct intel_engine_cs *ring); +int intel_mocs_init_engine(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 3212d8806b5a..90f3ab424e01 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -30,6 +30,7 @@ #include "i915_drv.h" #include "i915_reg.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" /* Limits for overlay size. According to intel doc, the real limits are: * Y width: 4095, UV width (planar): 2047, Y height: 2047, @@ -183,8 +184,7 @@ struct intel_overlay { u32 flip_addr; struct drm_i915_gem_object *reg_bo; /* flip handling */ - struct drm_i915_gem_request *last_flip_req; - void (*flip_tail)(struct intel_overlay *); + struct i915_gem_active last_flip; }; static struct overlay_registers __iomem * @@ -210,37 +210,46 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, io_mapping_unmap(regs); } -static int intel_overlay_do_wait_request(struct intel_overlay *overlay, +static void intel_overlay_submit_request(struct intel_overlay *overlay, struct drm_i915_gem_request *req, - void (*tail)(struct intel_overlay *)) + i915_gem_retire_fn retire) { - int ret; - - WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, req); + GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, + &overlay->i915->drm.struct_mutex)); + overlay->last_flip.retire = retire; + i915_gem_active_set(&overlay->last_flip, req); i915_add_request(req); +} - overlay->flip_tail = tail; - ret = i915_wait_request(overlay->last_flip_req); - if (ret) - return ret; +static int intel_overlay_do_wait_request(struct intel_overlay *overlay, + struct drm_i915_gem_request *req, + i915_gem_retire_fn retire) +{ + intel_overlay_submit_request(overlay, req, retire); + return i915_gem_active_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); +} - i915_gem_request_assign(&overlay->last_flip_req, NULL); - return 0; +static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) +{ + struct drm_i915_private *dev_priv = overlay->i915; + struct intel_engine_cs *engine = &dev_priv->engine[RCS]; + + return i915_gem_request_alloc(engine, dev_priv->kernel_context); } /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ring *ring; int ret; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -252,11 +261,12 @@ static int intel_overlay_on(struct intel_overlay *overlay) overlay->active = true; - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + ring = req->ring; + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); + intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -266,8 +276,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; u32 tmp; int ret; @@ -282,7 +292,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -292,29 +302,37 @@ static int intel_overlay_continue(struct intel_overlay *overlay, return ret; } - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(engine, flip_addr); - intel_ring_advance(engine); + ring = req->ring; + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); + intel_ring_emit(ring, flip_addr); + intel_ring_advance(ring); - WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, req); - i915_add_request(req); + intel_overlay_submit_request(overlay, req, NULL); return 0; } -static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) +static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, + struct drm_i915_gem_request *req) { + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); struct drm_i915_gem_object *obj = overlay->old_vid_bo; + i915_gem_track_fb(obj, NULL, + INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); overlay->old_vid_bo = NULL; } -static void intel_overlay_off_tail(struct intel_overlay *overlay) +static void intel_overlay_off_tail(struct i915_gem_active *active, + struct drm_i915_gem_request *req) { + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); struct drm_i915_gem_object *obj = overlay->vid_bo; /* never have the overlay hw on without showing a frame */ @@ -322,7 +340,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) return; i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); overlay->vid_bo = NULL; overlay->crtc->overlay = NULL; @@ -334,8 +352,8 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; int ret; @@ -347,7 +365,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -357,46 +375,36 @@ static int intel_overlay_off(struct intel_overlay *overlay) return ret; } + ring = req->ring; /* wait for overlay to go idle */ - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(engine, flip_addr); - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); + intel_ring_emit(ring, flip_addr); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); /* turn overlay off */ if (IS_I830(dev_priv)) { /* Workaround: Don't disable the overlay fully, since otherwise * it dies on the next OVERLAY_ON cmd. */ - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); } else { - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(engine, flip_addr); - intel_ring_emit(engine, + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); + intel_ring_emit(ring, flip_addr); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); } - intel_ring_advance(engine); + intel_ring_advance(ring); - return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); + return intel_overlay_do_wait_request(overlay, req, + intel_overlay_off_tail); } /* recover from an interruption due to a signal * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - int ret; - - if (overlay->last_flip_req == NULL) - return 0; - - ret = i915_wait_request(overlay->last_flip_req); - if (ret) - return ret; - - if (overlay->flip_tail) - overlay->flip_tail(overlay); - - i915_gem_request_assign(&overlay->last_flip_req, NULL); - return 0; + return i915_gem_active_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } /* Wait for pending overlay flip and release old frame. @@ -406,7 +414,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -420,8 +427,9 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; + struct intel_ring *ring; - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -431,22 +439,19 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) return ret; } - intel_ring_emit(engine, + ring = req->ring; + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); if (ret) return ret; - } - - intel_overlay_release_old_vid_tail(overlay); + } else + intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL); - - i915_gem_track_fb(overlay->old_vid_bo, NULL, - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); return 0; } @@ -459,7 +464,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) intel_overlay_release_old_vid(overlay); - overlay->last_flip_req = NULL; overlay->old_xscale = 0; overlay->old_yscale = 0; overlay->crtc = NULL; @@ -836,8 +840,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, overlay->old_vid_bo = overlay->vid_bo; overlay->vid_bo = new_bo; - intel_frontbuffer_flip(&dev_priv->drm, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe)); return 0; @@ -870,12 +873,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) iowrite32(0, ®s->OCMD); intel_overlay_unmap_regs(overlay, regs); - ret = intel_overlay_off(overlay); - if (ret != 0) - return ret; - - intel_overlay_off_tail(overlay); - return 0; + return intel_overlay_off(overlay); } static int check_overlay_possible_on_crtc(struct intel_overlay *overlay, @@ -1122,9 +1120,8 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, } crtc = to_intel_crtc(drmmode_crtc); - new_bo = to_intel_bo(drm_gem_object_lookup(file_priv, - put_image_rec->bo_handle)); - if (&new_bo->base == NULL) { + new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle); + if (!new_bo) { ret = -ENOENT; goto out_free; } @@ -1132,7 +1129,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); mutex_lock(&dev->struct_mutex); - if (new_bo->tiling_mode) { + if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); ret = -EINVAL; goto out_unlock; @@ -1220,7 +1217,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - drm_gem_object_unreference_unlocked(&new_bo->base); + i915_gem_object_put_unlocked(new_bo); out_free: kfree(params); @@ -1404,7 +1401,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) } overlay->flip_addr = reg_bo->phys_handle->busaddr; } else { - ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(reg_bo, NULL, + 0, PAGE_SIZE, PIN_MAPPABLE); if (ret) { DRM_ERROR("failed to pin overlay register bo\n"); goto out_free_bo; @@ -1444,7 +1442,7 @@ out_unpin_bo: if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) i915_gem_object_ggtt_unpin(reg_bo); out_free_bo: - drm_gem_object_unreference(®_bo->base); + i915_gem_object_put(reg_bo); out_free: mutex_unlock(&dev_priv->drm.struct_mutex); kfree(overlay); @@ -1461,7 +1459,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv) * hardware should be off already */ WARN_ON(dev_priv->overlay->active); - drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base); + i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo); kfree(dev_priv->overlay); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ba062d70a548..81ab11934d85 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -340,6 +340,11 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) I915_WRITE(FW_BLC_SELF, val); POSTING_READ(FW_BLC_SELF); } else if (IS_I915GM(dev)) { + /* + * FIXME can't find a bit like this for 915G, and + * and yet it does have the related watermark in + * FW_BLC_SELF. What's going on? + */ val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : _MASKED_BIT_DISABLE(INSTPM_SELF_EN); I915_WRITE(INSTPM, val); @@ -1580,7 +1585,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) obj = intel_fb_obj(enabled->primary->state->fb); /* self-refresh seems busted with untiled */ - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) enabled = NULL; } @@ -1604,6 +1609,9 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) unsigned long line_time_us; int entries; + if (IS_I915GM(dev) || IS_I945GM(dev)) + cpp = 4; + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ @@ -1618,7 +1626,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) if (IS_I945G(dev) || IS_I945GM(dev)) I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); - else if (IS_I915GM(dev)) + else I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } @@ -3344,6 +3352,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, plane_bytes_per_line *= 4; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line /= 4; + } else if (tiling == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; } else { plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); } @@ -4912,7 +4922,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, */ if (!(dev_priv->gt.awake && dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) return; /* Force a RPS boost (and don't count it against the client) if @@ -5103,35 +5113,31 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { - uint32_t rp_state_cap; - u32 ddcc_status = 0; - int ret; - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_BROXTON(dev_priv)) { - rp_state_cap = I915_READ(BXT_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; } else { - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; } - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - ret = sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status); - if (0 == ret) + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status) == 0) dev_priv->rps.efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), @@ -5141,29 +5147,26 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is - the natural hardware unit for SKL */ + * the natural hardware unit for SKL + */ dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; } +} - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; +static void reset_rps(struct drm_i915_private *dev_priv, + void (*set)(struct drm_i915_private *, u8)) +{ + u8 freq = dev_priv->rps.cur_freq; - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + /* force a reset */ + dev_priv->rps.power = -1; + dev_priv->rps.cur_freq = -1; - if (dev_priv->rps.min_freq_softlimit == 0) { - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = - max_t(int, dev_priv->rps.efficient_freq, - intel_freq_opcode(dev_priv, 450)); - else - dev_priv->rps.min_freq_softlimit = - dev_priv->rps.min_freq; - } + set(dev_priv, freq); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ @@ -5171,8 +5174,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - gen6_init_rps_frequencies(dev_priv); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { /* @@ -5202,8 +5203,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) /* Leaning on the below call to gen6_set_rps to program/setup the * Up/Down EI & threshold registers, as well as the RP_CONTROL, * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5290,9 +5290,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 2a: Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* 2b: Program RC6 thresholds.*/ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ @@ -5349,8 +5346,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 6: Ring frequency + overclocking (our driver does this later */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5358,7 +5354,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) static void gen6_enable_rps(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 rc6vids, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; int ret; @@ -5382,9 +5378,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@ -5435,16 +5428,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) if (ret) DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } - - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -5463,7 +5447,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) +static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { int min_freq = 15; unsigned int gpu_freq; @@ -5547,16 +5531,6 @@ static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) } } -void gen6_update_ring_freq(struct drm_i915_private *dev_priv) -{ - if (!HAS_CORE_RING_FREQ(dev_priv)) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev_priv); - mutex_unlock(&dev_priv->rps.hw_lock); -} - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { u32 val, rp0; @@ -5746,7 +5720,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) if (WARN_ON(!dev_priv->vlv_pctx)) return; - drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); + i915_gem_object_put_unlocked(dev_priv->vlv_pctx); dev_priv->vlv_pctx = NULL; } @@ -5769,8 +5743,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); switch ((val >> 6) & 3) { case 0: @@ -5806,17 +5778,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) @@ -5827,8 +5788,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); mutex_unlock(&dev_priv->sb_lock); @@ -5870,17 +5829,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.rp1_freq | dev_priv->rps.min_freq) & 1, "Odd GPU freq values\n"); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -5971,16 +5919,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6060,16 +5999,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6398,19 +6328,11 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - struct drm_i915_private *dev_priv; - struct intel_engine_cs *engine; bool ret = false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; - - for_each_engine(engine, dev_priv) - ret |= !list_empty(&engine->request_list); - -out_unlock: + if (i915_mch_dev) + ret = i915_mch_dev->gt.awake; spin_unlock_irq(&mchdev_lock); return ret; @@ -6566,30 +6488,60 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_get(dev_priv); } + mutex_lock(&dev_priv->rps.hw_lock); + + /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); else if (IS_VALLEYVIEW(dev_priv)) valleyview_init_gt_powersave(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_init_rps_frequencies(dev_priv); + + /* Derive initial user preferences/limits from the hardware limits */ + dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dev_priv->rps.min_freq_softlimit = + max_t(int, + dev_priv->rps.efficient_freq, + intel_freq_opcode(dev_priv, 450)); + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN6(dev_priv) || + IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { + u32 params = 0; + + sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (dev_priv->rps.max_freq & 0xff) * 50, + (params & 0xff) * 50); + dev_priv->rps.max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_CHERRYVIEW(dev_priv)) - return; - else if (IS_VALLEYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); if (!i915.enable_rc6) intel_runtime_pm_put(dev_priv); } -static void gen6_suspend_rps(struct drm_i915_private *dev_priv) -{ - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - gen6_disable_rps_interrupts(dev_priv); -} - /** * intel_suspend_gt_powersave - suspend PM work and helper threads * @dev_priv: i915 device @@ -6603,60 +6555,76 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - gen6_suspend_rps(dev_priv); + if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + intel_runtime_pm_put(dev_priv); - /* Force GPU to min freq during suspend */ - gen6_rps_idle(dev_priv); + /* gen6_rps_idle() will be called later to disable interrupts */ +} + +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) +{ + dev_priv->rps.enabled = true; /* force disabling */ + intel_disable_gt_powersave(dev_priv); + + gen6_reset_rps_interrupts(dev_priv); } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - intel_suspend_gt_powersave(dev_priv); + if (!READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_INFO(dev_priv)->gen >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else - gen6_disable_rps(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + if (INTEL_GEN(dev_priv) >= 9) { + gen9_disable_rc6(dev_priv); + gen9_disable_rps(dev_priv); + } else if (IS_CHERRYVIEW(dev_priv)) { + cherryview_disable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_disable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_disable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_disable_drps(dev_priv); } + + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } -static void intel_gen6_powersave_work(struct work_struct *work) +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); + /* We shouldn't be disabling as we submit, so this should be less + * racy than it appears! + */ + if (READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; - gen6_reset_rps_interrupts(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 9) { + } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); - } else { + gen6_update_ring_freq(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); } WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); @@ -6666,18 +6634,47 @@ static void intel_gen6_powersave_work(struct work_struct *work) WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); dev_priv->rps.enabled = true; + mutex_unlock(&dev_priv->rps.hw_lock); +} - gen6_enable_rps_interrupts(dev_priv); +static void __intel_autoenable_gt_powersave(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + struct intel_engine_cs *rcs; + struct drm_i915_gem_request *req; - mutex_unlock(&dev_priv->rps.hw_lock); + if (READ_ONCE(dev_priv->rps.enabled)) + goto out; + + rcs = &dev_priv->engine[RCS]; + if (rcs->last_context) + goto out; + + if (!rcs->init_context) + goto out; + mutex_lock(&dev_priv->drm.struct_mutex); + + req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); + if (IS_ERR(req)) + goto unlock; + + if (!i915.enable_execlists && i915_switch_context(req) == 0) + rcs->init_context(req); + + /* Mark the device busy, calling intel_enable_gt_powersave() */ + i915_add_request_no_flush(req); + +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); +out: intel_runtime_pm_put(dev_priv); } -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + if (READ_ONCE(dev_priv->rps.enabled)) return; if (IS_IRONLAKE_M(dev_priv)) { @@ -6698,21 +6695,13 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) * paths, so the _noresume version is enough (and in case of * runtime resume it's necessary). */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) + if (queue_delayed_work(dev_priv->wq, + &dev_priv->rps.autoenable_work, + round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } } -void intel_reset_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (INTEL_INFO(dev_priv)->gen < 6) - return; - - gen6_suspend_rps(dev_priv); - dev_priv->rps.enabled = false; -} - static void ibx_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -7787,7 +7776,7 @@ static void __intel_rps_boost_work(struct work_struct *work) if (!i915_gem_request_completed(req)) gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); - i915_gem_request_unreference(req); + i915_gem_request_put(req); kfree(boost); } @@ -7805,8 +7794,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) if (boost == NULL) return; - i915_gem_request_reference(req); - boost->req = req; + boost->req = i915_gem_request_get(req); INIT_WORK(&boost->work, __intel_rps_boost_work); queue_work(req->i915->wq, &boost->work); @@ -7819,11 +7807,9 @@ void intel_pm_setup(struct drm_device *dev) mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); - INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, - intel_gen6_powersave_work); + INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); - INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); - INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2b0d1baf15b3..59a21c9d2e43 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -645,9 +645,8 @@ unlock: mutex_unlock(&dev_priv->psr.lock); } -static void intel_psr_exit(struct drm_device *dev) +static void intel_psr_exit(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = dev_priv->psr.enabled; struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; enum pipe pipe = to_intel_crtc(crtc)->pipe; @@ -656,7 +655,7 @@ static void intel_psr_exit(struct drm_device *dev) if (!dev_priv->psr.active) return; - if (HAS_DDI(dev)) { + if (HAS_DDI(dev_priv)) { val = I915_READ(EDP_PSR_CTL); WARN_ON(!(val & EDP_PSR_ENABLE)); @@ -691,7 +690,7 @@ static void intel_psr_exit(struct drm_device *dev) /** * intel_psr_single_frame_update - Single Frame Update - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Some platforms support a single frame update feature that is used to @@ -699,10 +698,9 @@ static void intel_psr_exit(struct drm_device *dev) * So far it is only implemented for Valleyview and Cherryview because * hardware requires this to be done before a page flip. */ -void intel_psr_single_frame_update(struct drm_device *dev, +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; u32 val; @@ -711,7 +709,7 @@ void intel_psr_single_frame_update(struct drm_device *dev, * Single frame update is already supported on BDW+ but it requires * many W/A and it isn't really needed. */ - if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@ -737,7 +735,7 @@ void intel_psr_single_frame_update(struct drm_device *dev, /** * intel_psr_invalidate - Invalidade PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Since the hardware frontbuffer tracking has gaps we need to integrate @@ -747,10 +745,9 @@ void intel_psr_single_frame_update(struct drm_device *dev, * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." */ -void intel_psr_invalidate(struct drm_device *dev, +void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -767,14 +764,14 @@ void intel_psr_invalidate(struct drm_device *dev, dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits; if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); mutex_unlock(&dev_priv->psr.lock); } /** * intel_psr_flush - Flush PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -785,10 +782,9 @@ void intel_psr_invalidate(struct drm_device *dev, * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits. */ -void intel_psr_flush(struct drm_device *dev, +void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -806,7 +802,7 @@ void intel_psr_flush(struct drm_device *dev, /* By definition flush = invalidate + flush */ if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) if (!work_busy(&dev_priv->psr.work.work)) diff --git a/drivers/gpu/drm/i915/intel_renderstate.h b/drivers/gpu/drm/i915/intel_renderstate.h index 5bd69852752c..08f6fea05a2c 100644 --- a/drivers/gpu/drm/i915/intel_renderstate.h +++ b/drivers/gpu/drm/i915/intel_renderstate.h @@ -24,12 +24,13 @@ #ifndef _INTEL_RENDERSTATE_H #define _INTEL_RENDERSTATE_H -#include "i915_drv.h" +#include <linux/types.h> -extern const struct intel_renderstate_rodata gen6_null_state; -extern const struct intel_renderstate_rodata gen7_null_state; -extern const struct intel_renderstate_rodata gen8_null_state; -extern const struct intel_renderstate_rodata gen9_null_state; +struct intel_renderstate_rodata { + const u32 *reloc; + const u32 *batch; + const u32 batch_items; +}; #define RO_RENDERSTATE(_g) \ const struct intel_renderstate_rodata gen ## _g ## _null_state = { \ @@ -38,4 +39,9 @@ extern const struct intel_renderstate_rodata gen9_null_state; .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \ } +extern const struct intel_renderstate_rodata gen6_null_state; +extern const struct intel_renderstate_rodata gen7_null_state; +extern const struct intel_renderstate_rodata gen8_null_state; +extern const struct intel_renderstate_rodata gen9_null_state; + #endif /* INTEL_RENDERSTATE_H */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cca7792f26d5..e08a1e1b04e4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -47,57 +47,44 @@ int __intel_ring_space(int head, int tail, int size) return space - I915_RING_FREE_SPACE; } -void intel_ring_update_space(struct intel_ringbuffer *ringbuf) +void intel_ring_update_space(struct intel_ring *ring) { - if (ringbuf->last_retired_head != -1) { - ringbuf->head = ringbuf->last_retired_head; - ringbuf->last_retired_head = -1; + if (ring->last_retired_head != -1) { + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; } - ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR, - ringbuf->tail, ringbuf->size); -} - -static void __intel_ring_advance(struct intel_engine_cs *engine) -{ - struct intel_ringbuffer *ringbuf = engine->buffer; - ringbuf->tail &= ringbuf->size - 1; - engine->write_tail(engine, ringbuf->tail); + ring->space = __intel_ring_space(ring->head & HEAD_ADDR, + ring->tail, ring->size); } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; u32 cmd; int ret; cmd = MI_FLUSH; - if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) - cmd |= MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; u32 cmd; int ret; @@ -129,23 +116,20 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, * are flushed at any MI_FLUSH. */ - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd = MI_FLUSH; + if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(req->i915) || IS_GEN5(req->i915))) - cmd |= MI_INVALIDATE_ISP; + if (IS_G4X(req->i915) || IS_GEN5(req->i915)) + cmd |= MI_INVALIDATE_ISP; + } ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -190,45 +174,46 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + struct intel_ring *ring = req->ring; + u32 scratch_addr = + req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ - intel_ring_emit(engine, 0); /* low dword */ - intel_ring_emit(engine, 0); /* high dword */ - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); /* low dword */ + intel_ring_emit(ring, 0); /* high dword */ + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; + u32 scratch_addr = + req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -240,7 +225,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* @@ -249,7 +234,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, */ flags |= PIPE_CONTROL_CS_STALL; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -266,11 +251,11 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -278,30 +263,31 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; + u32 scratch_addr = + req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* @@ -318,13 +304,13 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -350,11 +336,11 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -363,41 +349,40 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - u32 flags = 0; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 flags = 0; int ret; flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -419,14 +404,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, return gen8_emit_pipe_control(req, flags, scratch_addr); } -static void ring_write_tail(struct intel_engine_cs *engine, - u32 value) -{ - struct drm_i915_private *dev_priv = engine->i915; - I915_WRITE_TAIL(engine, value); -} - -u64 intel_ring_get_active_head(struct intel_engine_cs *engine) +u64 intel_engine_get_active_head(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; u64 acthd; @@ -539,7 +517,7 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_CTL(engine, 0); I915_WRITE_HEAD(engine, 0); - engine->write_tail(engine, 0); + I915_WRITE_TAIL(engine, 0); if (!IS_GEN2(dev_priv)) { (void)I915_READ_CTL(engine); @@ -549,16 +527,11 @@ static bool stop_ring(struct intel_engine_cs *engine) return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); -} - static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ringbuffer *ringbuf = engine->buffer; - struct drm_i915_gem_object *obj = ringbuf->obj; + struct intel_ring *ring = engine->buffer; + struct drm_i915_gem_object *obj = ring->obj; int ret = 0; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -608,7 +581,7 @@ static int init_ring_common(struct intel_engine_cs *engine) (void)I915_READ_HEAD(engine); I915_WRITE_CTL(engine, - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); /* If the head is still not zero, the ring is dead */ @@ -627,10 +600,10 @@ static int init_ring_common(struct intel_engine_cs *engine) goto out; } - ringbuf->last_retired_head = -1; - ringbuf->head = I915_READ_HEAD(engine); - ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR; - intel_ring_update_space(ringbuf); + ring->last_retired_head = -1; + ring->head = I915_READ_HEAD(engine); + ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR; + intel_ring_update_space(ring); intel_engine_init_hangcheck(engine); @@ -646,7 +619,7 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine) return; i915_gem_object_ggtt_unpin(engine->scratch.obj); - drm_gem_object_unreference(&engine->scratch.obj->base); + i915_gem_object_put(engine->scratch.obj); engine->scratch.obj = NULL; } @@ -666,7 +639,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size) goto err; } - ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH); if (ret) goto err_unref; @@ -677,22 +650,21 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size) return 0; err_unref: - drm_gem_object_unreference(&engine->scratch.obj->base); + i915_gem_object_put(engine->scratch.obj); err: return ret; } static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; int ret, i; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -700,17 +672,16 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(engine, w->reg[i].addr); - intel_ring_emit(engine, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(engine); + intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1178,8 +1149,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2)); - /* WaInsertDummyPushConstPs:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1222,8 +1193,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_RO_PERF_DIS); - /* WaInsertDummyPushConstPs:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1331,189 +1302,195 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) if (dev_priv->semaphore_obj) { i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); - drm_gem_object_unreference(&dev_priv->semaphore_obj->base); + i915_gem_object_put(dev_priv->semaphore_obj); dev_priv->semaphore_obj = NULL; } intel_fini_pipe_control(engine); } -static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen8_rcs_signal(struct drm_i915_gem_request *req) { -#define MBOX_UPDATE_DWORDS 8 - struct intel_engine_cs *signaller = signaller_req->engine; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + ret = intel_ring_begin(req, (num_rings-1) * 8); if (ret) return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; + u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - intel_ring_emit(signaller, lower_32_bits(gtt_offset)); - intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); - intel_ring_emit(signaller, 0); - intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(signaller, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + intel_ring_emit(ring, lower_32_bits(gtt_offset)); + intel_ring_emit(ring, upper_32_bits(gtt_offset)); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, + MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + intel_ring_emit(ring, 0); } + intel_ring_advance(ring); return 0; } -static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen8_xcs_signal(struct drm_i915_gem_request *req) { -#define MBOX_UPDATE_DWORDS 6 - struct intel_engine_cs *signaller = signaller_req->engine; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + ret = intel_ring_begin(req, (num_rings-1) * 6); if (ret) return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; + u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | - MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(signaller, lower_32_bits(gtt_offset) | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); - intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(signaller, 0); + intel_ring_emit(ring, + (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(ring, + lower_32_bits(gtt_offset) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, upper_32_bits(gtt_offset)); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, + MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + intel_ring_emit(ring, 0); } + intel_ring_advance(ring); return 0; } -static int gen6_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen6_signal(struct drm_i915_gem_request *req) { - struct intel_engine_cs *signaller = signaller_req->engine; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *useless; enum intel_engine_id id; int ret, num_rings; -#define MBOX_UPDATE_DWORDS 3 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); if (ret) return ret; for_each_engine_id(useless, dev_priv, id) { - i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[id]; + i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id]; if (i915_mmio_reg_valid(mbox_reg)) { - intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(signaller, mbox_reg); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, mbox_reg); + intel_ring_emit(ring, req->fence.seqno); } } /* If num_dwords was rounded, make sure the tail pointer is correct */ if (num_rings % 2 == 0) - intel_ring_emit(signaller, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; +} + +static void i9xx_submit_request(struct drm_i915_gem_request *request) +{ + struct drm_i915_private *dev_priv = request->i915; + + I915_WRITE_TAIL(request->engine, + intel_ring_offset(request->ring, request->tail)); +} + +static int i9xx_emit_request(struct drm_i915_gem_request *req) +{ + struct intel_ring *ring = req->ring; + int ret; + + ret = intel_ring_begin(req, 4); + if (ret) + return ret; + + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); + intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_advance(ring); + + req->tail = ring->tail; return 0; } /** - * gen6_add_request - Update the semaphore mailbox registers + * gen6_sema_emit_request - Update the semaphore mailbox registers * * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int -gen6_add_request(struct drm_i915_gem_request *req) +static int gen6_sema_emit_request(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; int ret; - if (engine->semaphore.signal) - ret = engine->semaphore.signal(req, 4); - else - ret = intel_ring_begin(req, 4); - + ret = req->engine->semaphore.signal(req); if (ret) return ret; - intel_ring_emit(engine, MI_STORE_DWORD_INDEX); - intel_ring_emit(engine, - I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); - intel_ring_emit(engine, MI_USER_INTERRUPT); - __intel_ring_advance(engine); - - return 0; + return i9xx_emit_request(req); } -static int -gen8_render_add_request(struct drm_i915_gem_request *req) +static int gen8_render_emit_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; - if (engine->semaphore.signal) - ret = engine->semaphore.signal(req, 8); - else - ret = intel_ring_begin(req, 8); + if (engine->semaphore.signal) { + ret = engine->semaphore.signal(req); + if (ret) + return ret; + } + + ret = intel_ring_begin(req, 8); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(engine, (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_ring_emit(engine, intel_hws_seqno_address(req->engine)); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); /* We're thrashing one dword of HWS. */ - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_USER_INTERRUPT); - intel_ring_emit(engine, MI_NOOP); - __intel_ring_advance(engine); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); - return 0; -} + req->tail = ring->tail; -static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, - u32 seqno) -{ - return dev_priv->last_seqno < seqno; + return 0; } /** @@ -1525,82 +1502,71 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, */ static int -gen8_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen8_ring_sync_to(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal) { - struct intel_engine_cs *waiter = waiter_req->engine; - struct drm_i915_private *dev_priv = waiter_req->i915; - u64 offset = GEN8_WAIT_OFFSET(waiter, signaller->id); + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; + u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; int ret; - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(waiter, seqno); - intel_ring_emit(waiter, lower_32_bits(offset)); - intel_ring_emit(waiter, upper_32_bits(offset)); - intel_ring_advance(waiter); + intel_ring_emit(ring, + MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD); + intel_ring_emit(ring, signal->fence.seqno); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_advance(ring); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. * We do this on the i915_switch_context() following the wait and * before the dispatch. */ - ppgtt = waiter_req->ctx->ppgtt; - if (ppgtt && waiter_req->engine->id != RCS) - ppgtt->pd_dirty_rings |= intel_engine_flag(waiter_req->engine); + ppgtt = req->ctx->ppgtt; + if (ppgtt && req->engine->id != RCS) + ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine); return 0; } static int -gen6_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen6_ring_sync_to(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal) { - struct intel_engine_cs *waiter = waiter_req->engine; + struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id]; int ret; - /* Throughout all of the GEM code, seqno passed implies our current - * seqno is >= the last seqno executed. However for hardware the - * comparison is strictly greater than. - */ - seqno -= 1; - WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; - /* If seqno wrap happened, omit the wait with no-ops */ - if (likely(!i915_gem_has_seqno_wrapped(waiter_req->i915, seqno))) { - intel_ring_emit(waiter, dw1 | wait_mbox); - intel_ring_emit(waiter, seqno); - intel_ring_emit(waiter, 0); - intel_ring_emit(waiter, MI_NOOP); - } else { - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - } - intel_ring_advance(waiter); + intel_ring_emit(ring, dw1 | wait_mbox); + /* Throughout all of the GEM code, seqno passed implies our current + * seqno is >= the last seqno executed. However for hardware the + * comparison is strictly greater than. + */ + intel_ring_emit(ring, signal->fence.seqno - 1); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static void -gen5_seqno_barrier(struct intel_engine_cs *ring) +gen5_seqno_barrier(struct intel_engine_cs *engine) { /* MI_STORE are internally buffered by the GPU and not flushed * either by MI_FLUSH or SyncFlush or any other combination of @@ -1693,40 +1659,18 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, MI_FLUSH); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); - return 0; -} - -static int -i9xx_add_request(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - ret = intel_ring_begin(req, 4); - if (ret) - return ret; - - intel_ring_emit(engine, MI_STORE_DWORD_INDEX); - intel_ring_emit(engine, - I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); - intel_ring_emit(engine, MI_USER_INTERRUPT); - __intel_ring_advance(engine); - + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1788,24 +1732,24 @@ gen8_irq_disable(struct intel_engine_cs *engine) } static int -i965_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned dispatch_flags) +i965_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 length, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } @@ -1815,12 +1759,12 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +i830_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; - u32 cs_offset = engine->scratch.gtt_offset; + struct intel_ring *ring = req->ring; + u32 cs_offset = req->engine->scratch.gtt_offset; int ret; ret = intel_ring_begin(req, 6); @@ -1828,13 +1772,13 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, return ret; /* Evict the invalid PTE TLBs */ - intel_ring_emit(engine, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(engine, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(engine, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(engine, cs_offset); - intel_ring_emit(engine, 0xdeadbeef); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); + intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0xdeadbeef); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) @@ -1848,17 +1792,17 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(engine, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(engine, + intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(engine, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(engine, cs_offset); - intel_ring_emit(engine, 4096); - intel_ring_emit(engine, offset); + intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 4096); + intel_ring_emit(ring, offset); - intel_ring_emit(engine, MI_FLUSH); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* ... and execute it. */ offset = cs_offset; @@ -1868,30 +1812,30 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); + intel_ring_advance(ring); return 0; } static int -i915_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +i915_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); + intel_ring_advance(ring); return 0; } @@ -1917,7 +1861,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine) kunmap(sg_page(obj->pages->sgl)); i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); engine->status_page.obj = NULL; } @@ -1952,10 +1896,10 @@ static int init_status_page(struct intel_engine_cs *engine) * actualy map it). */ flags |= PIN_MAPPABLE; - ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags); if (ret) { err_unref: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ret; } @@ -1989,32 +1933,17 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) -{ - GEM_BUG_ON(ringbuf->vma == NULL); - GEM_BUG_ON(ringbuf->virtual_start == NULL); - - if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) - i915_gem_object_unpin_map(ringbuf->obj); - else - i915_vma_unpin_iomap(ringbuf->vma); - ringbuf->virtual_start = NULL; - - i915_gem_object_ggtt_unpin(ringbuf->obj); - ringbuf->vma = NULL; -} - -int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, - struct intel_ringbuffer *ringbuf) +int intel_ring_pin(struct intel_ring *ring) { - struct drm_i915_gem_object *obj = ringbuf->obj; + struct drm_i915_private *dev_priv = ring->engine->i915; + struct drm_i915_gem_object *obj = ring->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ unsigned flags = PIN_OFFSET_BIAS | 4096; void *addr; int ret; if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags); if (ret) return ret; @@ -2028,8 +1957,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, goto err_unpin; } } else { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, - flags | PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, + flags | PIN_MAPPABLE); if (ret) return ret; @@ -2040,15 +1969,16 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(dev_priv); - addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); + addr = (void __force *) + i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); goto err_unpin; } } - ringbuf->virtual_start = addr; - ringbuf->vma = i915_gem_obj_to_ggtt(obj); + ring->vaddr = addr; + ring->vma = i915_gem_obj_to_ggtt(obj); return 0; err_unpin: @@ -2056,39 +1986,56 @@ err_unpin: return ret; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_ring_unpin(struct intel_ring *ring) { - drm_gem_object_unreference(&ringbuf->obj->base); - ringbuf->obj = NULL; + GEM_BUG_ON(!ring->vma); + GEM_BUG_ON(!ring->vaddr); + + if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen) + i915_gem_object_unpin_map(ring->obj); + else + i915_vma_unpin_iomap(ring->vma); + ring->vaddr = NULL; + + i915_gem_object_ggtt_unpin(ring->obj); + ring->vma = NULL; +} + +static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) +{ + i915_gem_object_put(ring->obj); + ring->obj = NULL; } static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) + struct intel_ring *ring) { struct drm_i915_gem_object *obj; obj = NULL; if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ringbuf->size); + obj = i915_gem_object_create_stolen(dev, ring->size); if (obj == NULL) - obj = i915_gem_object_create(dev, ringbuf->size); + obj = i915_gem_object_create(dev, ring->size); if (IS_ERR(obj)) return PTR_ERR(obj); /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ringbuf->obj = obj; + ring->obj = obj; return 0; } -struct intel_ringbuffer * -intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) { - struct intel_ringbuffer *ring; + struct intel_ring *ring; int ret; + GEM_BUG_ON(!is_power_of_2(size)); + ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (ring == NULL) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", @@ -2099,6 +2046,8 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) ring->engine = engine; list_add(&ring->link, &engine->buffers); + INIT_LIST_HEAD(&ring->request_list); + ring->size = size; /* Workaround an erratum on the i830 which causes a hang if * the TAIL pointer points to within the last 2 cachelines @@ -2124,7 +2073,7 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) } void -intel_ringbuffer_free(struct intel_ringbuffer *ring) +intel_ring_free(struct intel_ring *ring) { intel_destroy_ringbuffer_obj(ring); list_del(&ring->link); @@ -2143,7 +2092,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0); + ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0, + ctx->ggtt_alignment, 0); if (ret) goto error; } @@ -2158,7 +2108,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, if (ctx == ctx->i915->kernel_context) ce->initialised = true; - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); return 0; error: @@ -2179,28 +2129,23 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, if (ce->state) i915_gem_object_ggtt_unpin(ce->state); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } -static int intel_init_ring_buffer(struct drm_device *dev, - struct intel_engine_cs *engine) +static int intel_init_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf; + struct drm_i915_private *dev_priv = engine->i915; + struct intel_ring *ring; int ret; WARN_ON(engine->buffer); - engine->i915 = dev_priv; - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->execlist_queue); - INIT_LIST_HEAD(&engine->buffers); - i915_gem_batch_pool_init(dev, &engine->batch_pool); + intel_engine_setup_common(engine); + memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); - ret = intel_engine_init_breadcrumbs(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; @@ -2215,12 +2160,12 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto error; - ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error; } - engine->buffer = ringbuf; + engine->buffer = ring; if (I915_NEED_GFX_HWS(dev_priv)) { ret = init_status_page(engine); @@ -2233,26 +2178,22 @@ static int intel_init_ring_buffer(struct drm_device *dev, goto error; } - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ringbuf); + ret = intel_ring_pin(ring); if (ret) { DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", engine->name, ret); - intel_destroy_ringbuffer_obj(ringbuf); + intel_destroy_ringbuffer_obj(ring); goto error; } - ret = i915_cmd_parser_init_ring(engine); - if (ret) - goto error; - return 0; error: - intel_cleanup_engine(engine); + intel_engine_cleanup(engine); return ret; } -void intel_cleanup_engine(struct intel_engine_cs *engine) +void intel_engine_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv; @@ -2262,11 +2203,10 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_stop_engine(engine); WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); - intel_unpin_ringbuffer_obj(engine->buffer); - intel_ringbuffer_free(engine->buffer); + intel_ring_unpin(engine->buffer); + intel_ring_free(engine->buffer); engine->buffer = NULL; } @@ -2280,33 +2220,13 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) cleanup_phys_status_page(engine); } - i915_cmd_parser_fini_ring(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); intel_ring_context_unpin(dev_priv->kernel_context, engine); engine->i915 = NULL; } -int intel_engine_idle(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *req; - - /* Wait upon the last request to be completed */ - if (list_empty(&engine->request_list)) - return 0; - - req = list_entry(engine->request_list.prev, - struct drm_i915_gem_request, - list); - - /* Make sure we do not trigger any retires */ - return __i915_wait_request(req, - req->i915->mm.interruptible, - NULL, NULL); -} - int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) { int ret; @@ -2317,7 +2237,7 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - request->ringbuf = request->engine->buffer; + request->ring = request->engine->buffer; ret = intel_ring_begin(request, 0); if (ret) @@ -2329,12 +2249,12 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { - struct intel_ringbuffer *ringbuf = req->ringbuf; - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; + int ret; - intel_ring_update_space(ringbuf); - if (ringbuf->space >= bytes) + intel_ring_update_space(ring); + if (ring->space >= bytes) return 0; /* @@ -2348,35 +2268,38 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) */ GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &engine->request_list, list) { + list_for_each_entry(target, &ring->request_list, ring_link) { unsigned space; - /* - * The request queue is per-engine, so can contain requests - * from multiple ringbuffers. Here, we must ignore any that - * aren't from the ringbuffer we're considering. - */ - if (target->ringbuf != ringbuf) - continue; - /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ringbuf->tail, - ringbuf->size); + space = __intel_ring_space(target->postfix, ring->tail, + ring->size); if (space >= bytes) break; } - if (WARN_ON(&target->list == &engine->request_list)) + if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - return i915_wait_request(target); + ret = i915_wait_request(target, true, NULL, NO_WAITBOOST); + if (ret) + return ret; + + if (i915_reset_in_progress(&target->i915->gpu_error)) + return -EAGAIN; + + i915_gem_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; } int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { - struct intel_ringbuffer *ringbuf = req->ringbuf; - int remain_actual = ringbuf->size - ringbuf->tail; - int remain_usable = ringbuf->effective_size - ringbuf->tail; + struct intel_ring *ring = req->ring; + int remain_actual = ring->size - ring->tail; + int remain_usable = ring->effective_size - ring->tail; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -2403,37 +2326,33 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) wait_bytes = total_bytes; } - if (wait_bytes > ringbuf->space) { + if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) return ret; - - intel_ring_update_space(ringbuf); - if (unlikely(ringbuf->space < wait_bytes)) - return -EAGAIN; } if (unlikely(need_wrap)) { - GEM_BUG_ON(remain_actual > ringbuf->space); - GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); + GEM_BUG_ON(remain_actual > ring->space); + GEM_BUG_ON(ring->tail + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ringbuf->virtual_start + ringbuf->tail, - 0, remain_actual); - ringbuf->tail = 0; - ringbuf->space -= remain_actual; + memset(ring->vaddr + ring->tail, 0, remain_actual); + ring->tail = 0; + ring->space -= remain_actual; } - ringbuf->space -= bytes; - GEM_BUG_ON(ringbuf->space < 0); + ring->space -= bytes; + GEM_BUG_ON(ring->space < 0); return 0; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - int num_dwords = (engine->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + struct intel_ring *ring = req->ring; + int num_dwords = + (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; if (num_dwords == 0) @@ -2445,14 +2364,14 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) return ret; while (num_dwords--) - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } -void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) { struct drm_i915_private *dev_priv = engine->i915; @@ -2496,10 +2415,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) rcu_read_unlock(); } -static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, - u32 value) +static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *dev_priv = request->i915; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -2523,8 +2441,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ - I915_WRITE_FW(RING_TAIL(engine->mmio_base), value); - POSTING_READ_FW(RING_TAIL(engine->mmio_base)); + i9xx_submit_request(request); /* Let the ring send IDLE messages to the GT again, * and so let it sleep to conserve power when idle. @@ -2535,10 +2452,9 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; uint32_t cmd; int ret; @@ -2563,30 +2479,29 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_GPU_DOMAINS) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(engine, 0); /* upper addr */ - intel_ring_emit(engine, 0); /* value */ + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ } else { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } static int -gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +gen8_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; - bool ppgtt = USES_PPGTT(engine->dev) && + struct intel_ring *ring = req->ring; + bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -2595,71 +2510,70 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(engine, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(engine, lower_32_bits(offset)); - intel_ring_emit(engine, upper_32_bits(offset)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int -hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +hsw_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } static int -gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +gen6_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_engine_cs *engine = req->engine; + struct intel_ring *ring = req->ring; uint32_t cmd; int ret; @@ -2684,19 +2598,19 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_DOMAIN_RENDER) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(engine, 0); /* upper addr */ - intel_ring_emit(engine, 0); /* value */ + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ } else { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } @@ -2707,7 +2621,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct drm_i915_gem_object *obj; int ret, i; - if (!i915_semaphore_is_enabled(dev_priv)) + if (!i915.semaphores) return; if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { @@ -2717,9 +2631,9 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, i915.semaphores = 0; } else { i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (ret != 0) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); i915.semaphores = 0; } else { @@ -2728,13 +2642,13 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } } - if (!i915_semaphore_is_enabled(dev_priv)) + if (!i915.semaphores) return; if (INTEL_GEN(dev_priv) >= 8) { u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); - engine->semaphore.sync_to = gen8_ring_sync; + engine->semaphore.sync_to = gen8_ring_sync_to; engine->semaphore.signal = gen8_xcs_signal; for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -2748,7 +2662,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, engine->semaphore.signal_ggtt[i] = ring_offset; } } else if (INTEL_GEN(dev_priv) >= 6) { - engine->semaphore.sync_to = gen6_ring_sync; + engine->semaphore.sync_to = gen6_ring_sync_to; engine->semaphore.signal = gen6_signal; /* @@ -2804,6 +2718,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; + if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable = gen8_irq_enable; engine->irq_disable = gen8_irq_disable; @@ -2828,74 +2744,66 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + intel_ring_init_irq(dev_priv, engine); + intel_ring_init_semaphores(dev_priv, engine); + engine->init_hw = init_ring_common; - engine->write_tail = ring_write_tail; - engine->add_request = i9xx_add_request; - if (INTEL_GEN(dev_priv) >= 6) - engine->add_request = gen6_add_request; + engine->emit_request = i9xx_emit_request; + if (i915.semaphores) + engine->emit_request = gen6_sema_emit_request; + engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->emit_bb_start = gen8_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 6) - engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 4) - engine->dispatch_execbuffer = i965_dispatch_execbuffer; + engine->emit_bb_start = i965_emit_bb_start; else if (IS_I830(dev_priv) || IS_845G(dev_priv)) - engine->dispatch_execbuffer = i830_dispatch_execbuffer; + engine->emit_bb_start = i830_emit_bb_start; else - engine->dispatch_execbuffer = i915_dispatch_execbuffer; - - intel_ring_init_irq(dev_priv, engine); - intel_ring_init_semaphores(dev_priv, engine); + engine->emit_bb_start = i915_emit_bb_start; } -int intel_init_render_ring_buffer(struct drm_device *dev) +int intel_init_render_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; + struct drm_i915_private *dev_priv = engine->i915; int ret; - engine->name = "render ring"; - engine->id = RCS; - engine->exec_id = I915_EXEC_RENDER; - engine->hw_id = 0; - engine->mmio_base = RENDER_RING_BASE; - intel_ring_default_vfuncs(dev_priv, engine); - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; - engine->add_request = gen8_render_add_request; - engine->flush = gen8_render_ring_flush; - if (i915_semaphore_is_enabled(dev_priv)) + engine->emit_request = gen8_render_emit_request; + engine->emit_flush = gen8_render_ring_flush; + if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; - engine->flush = gen7_render_ring_flush; + engine->emit_flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) - engine->flush = gen6_render_ring_flush; + engine->emit_flush = gen6_render_ring_flush; } else if (IS_GEN5(dev_priv)) { - engine->flush = gen4_render_ring_flush; + engine->emit_flush = gen4_render_ring_flush; } else { if (INTEL_GEN(dev_priv) < 4) - engine->flush = gen2_render_ring_flush; + engine->emit_flush = gen2_render_ring_flush; else - engine->flush = gen4_render_ring_flush; + engine->emit_flush = gen4_render_ring_flush; engine->irq_enable_mask = I915_USER_INTERRUPT; } if (IS_HASWELL(dev_priv)) - engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; + engine->emit_bb_start = hsw_emit_bb_start; engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - ret = intel_init_ring_buffer(dev, engine); + ret = intel_init_ring_buffer(engine); if (ret) return ret; @@ -2912,166 +2820,71 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return 0; } -int intel_init_bsd_ring_buffer(struct drm_device *dev) +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VCS]; - - engine->name = "bsd ring"; - engine->id = VCS; - engine->exec_id = I915_EXEC_BSD; - engine->hw_id = 1; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); if (INTEL_GEN(dev_priv) >= 6) { - engine->mmio_base = GEN6_BSD_RING_BASE; /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) - engine->write_tail = gen6_bsd_ring_write_tail; - engine->flush = gen6_bsd_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - else + engine->submit_request = gen6_bsd_submit_request; + engine->emit_flush = gen6_bsd_ring_flush; + if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { engine->mmio_base = BSD_RING_BASE; - engine->flush = bsd_ring_flush; + engine->emit_flush = bsd_ring_flush; if (IS_GEN5(dev_priv)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(engine); } /** * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) */ -int intel_init_bsd2_ring_buffer(struct drm_device *dev) +int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VCS2]; - - engine->name = "bsd2 ring"; - engine->id = VCS2; - engine->exec_id = I915_EXEC_BSD; - engine->hw_id = 4; - engine->mmio_base = GEN8_BSD2_RING_BASE; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_bsd_ring_flush; - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + engine->emit_flush = gen6_bsd_ring_flush; - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(engine); } -int intel_init_blt_ring_buffer(struct drm_device *dev) +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[BCS]; - - engine->name = "blitter ring"; - engine->id = BCS; - engine->exec_id = I915_EXEC_BLT; - engine->hw_id = 2; - engine->mmio_base = BLT_RING_BASE; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - else + engine->emit_flush = gen6_ring_flush; + if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(engine); } -int intel_init_vebox_ring_buffer(struct drm_device *dev) +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VECS]; - - engine->name = "video enhancement ring"; - engine->id = VECS; - engine->exec_id = I915_EXEC_VEBOX; - engine->hw_id = 3; - engine->mmio_base = VEBOX_RING_BASE; + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_ring_flush; + engine->emit_flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; - } else { + if (INTEL_GEN(dev_priv) < 8) { engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; } - return intel_init_ring_buffer(dev, engine); -} - -int -intel_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS); - - engine->gpu_caches_dirty = false; - return 0; -} - -int -intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - - engine->gpu_caches_dirty = false; - return 0; -} - -void -intel_stop_engine(struct intel_engine_cs *engine) -{ - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - stop_ring(engine); + return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 12cb7ed90014..43e545e44352 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,6 +3,7 @@ #include <linux/hashtable.h> #include "i915_gem_batch_pool.h" +#include "i915_gem_request.h" #define I915_CMD_HASH_ORDER 9 @@ -31,23 +32,23 @@ struct intel_hw_status_page { struct drm_i915_gem_object *obj; }; -#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) -#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) +#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) +#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) -#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base)) -#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) +#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) +#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) -#define I915_READ_HEAD(ring) I915_READ(RING_HEAD((ring)->mmio_base)) -#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) +#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) +#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) -#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base)) -#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) +#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) +#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) -#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) -#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) +#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) +#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) -#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) -#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val) +#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) +#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. @@ -62,7 +63,7 @@ struct intel_hw_status_page { (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) -enum intel_ring_hangcheck_action { +enum intel_engine_hangcheck_action { HANGCHECK_IDLE = 0, HANGCHECK_WAIT, HANGCHECK_ACTIVE, @@ -72,24 +73,26 @@ enum intel_ring_hangcheck_action { #define HANGCHECK_SCORE_RING_HUNG 31 -struct intel_ring_hangcheck { +struct intel_engine_hangcheck { u64 acthd; unsigned long user_interrupts; u32 seqno; int score; - enum intel_ring_hangcheck_action action; + enum intel_engine_hangcheck_action action; int deadlock; u32 instdone[I915_NUM_INSTDONE_REG]; }; -struct intel_ringbuffer { +struct intel_ring { struct drm_i915_gem_object *obj; - void __iomem *virtual_start; + void *vaddr; struct i915_vma *vma; struct intel_engine_cs *engine; struct list_head link; + struct list_head request_list; + u32 head; u32 tail; int space; @@ -146,8 +149,10 @@ struct intel_engine_cs { unsigned int exec_id; unsigned int hw_id; unsigned int guc_id; /* XXX same as hw_id? */ + u64 fence_context; u32 mmio_base; - struct intel_ringbuffer *buffer; + unsigned int irq_shift; + struct intel_ring *buffer; struct list_head buffers; /* Rather than have every client wait upon all user interrupts, @@ -195,33 +200,34 @@ struct intel_engine_cs { u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void (*irq_enable)(struct intel_engine_cs *ring); - void (*irq_disable)(struct intel_engine_cs *ring); + void (*irq_enable)(struct intel_engine_cs *engine); + void (*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *ring); + int (*init_hw)(struct intel_engine_cs *engine); int (*init_context)(struct drm_i915_gem_request *req); - void (*write_tail)(struct intel_engine_cs *ring, - u32 value); - int __must_check (*flush)(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains); - int (*add_request)(struct drm_i915_gem_request *req); + int (*emit_flush)(struct drm_i915_gem_request *request, + u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) + int (*emit_bb_start)(struct drm_i915_gem_request *req, + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE BIT(0) +#define I915_DISPATCH_PINNED BIT(1) +#define I915_DISPATCH_RS BIT(2) + int (*emit_request)(struct drm_i915_gem_request *req); + void (*submit_request)(struct drm_i915_gem_request *req); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last * seen value is good enough. Note that the seqno will always be * monotonic, even if not coherent. */ - void (*irq_seqno_barrier)(struct intel_engine_cs *ring); - int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned dispatch_flags); -#define I915_DISPATCH_SECURE 0x1 -#define I915_DISPATCH_PINNED 0x2 -#define I915_DISPATCH_RS 0x4 - void (*cleanup)(struct intel_engine_cs *ring); + void (*irq_seqno_barrier)(struct intel_engine_cs *engine); + void (*cleanup)(struct intel_engine_cs *engine); /* GEN8 signal/wait table - never trust comments! * signal to signal to signal to signal to signal to @@ -274,12 +280,9 @@ struct intel_engine_cs { }; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *to_req, - struct intel_engine_cs *from, - u32 seqno); - int (*signal)(struct drm_i915_gem_request *signaller_req, - /* num_dwords needed by caller */ - unsigned int num_dwords); + int (*sync_to)(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal); + int (*signal)(struct drm_i915_gem_request *req); } semaphore; /* Execlists */ @@ -291,24 +294,6 @@ struct intel_engine_cs { unsigned int idle_lite_restore_wa; bool disable_lite_restore_wa; u32 ctx_desc_template; - int (*emit_request)(struct drm_i915_gem_request *request); - int (*emit_flush)(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains); - int (*emit_bb_start)(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags); - - /** - * List of objects currently involved in rendering from the - * ringbuffer. - * - * Includes buffers having the contents of their GPU caches - * flushed, not necessarily primitives. last_read_req - * represents when the rendering involved will be completed. - * - * A reference is held on the buffer while on this list. - */ - struct list_head active_list; /** * List of breadcrumbs associated with GPU requests currently @@ -323,11 +308,16 @@ struct intel_engine_cs { */ u32 last_submitted_seqno; - bool gpu_caches_dirty; + /* An RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_gem_active_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_gem_active last_request; struct i915_gem_context *last_context; - struct intel_ring_hangcheck hangcheck; + struct intel_engine_hangcheck hangcheck; struct { struct drm_i915_gem_object *obj; @@ -338,7 +328,7 @@ struct intel_engine_cs { /* * Table of commands the command parser needs to know about - * for this ring. + * for this engine. */ DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); @@ -352,11 +342,11 @@ struct intel_engine_cs { * Returns the bitmask for the length field of the specified command. * Return 0 for an unrecognized/invalid command. * - * If the command parser finds an entry for a command in the ring's + * If the command parser finds an entry for a command in the engine's * cmd_tables, it gets the command's length based on the table entry. - * If not, it calls this function to determine the per-ring length field - * encoding for the command (i.e. certain opcode ranges use certain bits - * to encode the command length in the header). + * If not, it calls this function to determine the per-engine length + * field encoding for the command (i.e. different opcode ranges use + * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); }; @@ -374,8 +364,8 @@ intel_engine_flag(const struct intel_engine_cs *engine) } static inline u32 -intel_ring_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) +intel_engine_sync_index(struct intel_engine_cs *engine, + struct intel_engine_cs *other) { int idx; @@ -437,67 +427,83 @@ intel_write_status_page(struct intel_engine_cs *engine, #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -struct intel_ringbuffer * -intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size); -int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, - struct intel_ringbuffer *ringbuf); -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf); -void intel_ringbuffer_free(struct intel_ringbuffer *ring); +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_free(struct intel_ring *ring); -void intel_stop_engine(struct intel_engine_cs *engine); -void intel_cleanup_engine(struct intel_engine_cs *engine); +void intel_engine_stop(struct intel_engine_cs *engine); +void intel_engine_cleanup(struct intel_engine_cs *engine); int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_engine_cs *engine, - u32 data) + +static inline void intel_ring_emit(struct intel_ring *ring, u32 data) { - struct intel_ringbuffer *ringbuf = engine->buffer; - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; + *(uint32_t *)(ring->vaddr + ring->tail) = data; + ring->tail += 4; } -static inline void intel_ring_emit_reg(struct intel_engine_cs *engine, - i915_reg_t reg) + +static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) { - intel_ring_emit(engine, i915_mmio_reg_offset(reg)); + intel_ring_emit(ring, i915_mmio_reg_offset(reg)); +} + +static inline void intel_ring_advance(struct intel_ring *ring) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ } -static inline void intel_ring_advance(struct intel_engine_cs *engine) + +static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) { - struct intel_ringbuffer *ringbuf = engine->buffer; - ringbuf->tail &= ringbuf->size - 1; + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + return value & (ring->size - 1); } + int __intel_ring_space(int head, int tail, int size); -void intel_ring_update_space(struct intel_ringbuffer *ringbuf); +void intel_ring_update_space(struct intel_ring *ring); -int __must_check intel_engine_idle(struct intel_engine_cs *engine); -void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); -int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); -int intel_init_render_ring_buffer(struct drm_device *dev); -int intel_init_bsd_ring_buffer(struct drm_device *dev); -int intel_init_bsd2_ring_buffer(struct drm_device *dev); -int intel_init_blt_ring_buffer(struct drm_device *dev); -int intel_init_vebox_ring_buffer(struct drm_device *dev); +void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_init_common(struct intel_engine_cs *engine); +void intel_engine_cleanup_common(struct intel_engine_cs *engine); -u64 intel_ring_get_active_head(struct intel_engine_cs *engine); -static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) +static inline int intel_engine_idle(struct intel_engine_cs *engine, + bool interruptible) { - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); + /* Wait upon the last request to be completed */ + return i915_gem_active_wait_unlocked(&engine->last_request, + interruptible, NULL, NULL); } -int init_workarounds_ring(struct intel_engine_cs *engine); +int intel_init_render_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); -static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) +u64 intel_engine_get_active_head(struct intel_engine_cs *engine); +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) { - return ringbuf->tail; + return intel_read_status_page(engine, I915_GEM_HWS_INDEX); } +int init_workarounds_ring(struct intel_engine_cs *engine); + /* * Arbitrary size for largest possible 'add request' sequence. The code paths * are complex and variable. Empirical measurement shows that the worst case @@ -513,17 +519,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - u32 seqno; -}; - -struct intel_signal_node { - struct rb_node node; - struct intel_wait wait; -}; - int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) @@ -570,4 +565,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); +static inline bool intel_engine_is_active(struct intel_engine_cs *engine) +{ + return i915_gem_active_isset(&engine->last_request); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 1d9736b0cced..cbdca7e4d307 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -36,6 +36,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_plane_helper.h> #include "intel_drv.h" +#include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" @@ -430,7 +431,7 @@ vlv_update_plane(struct drm_plane *dplane, */ sprctl |= SP_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) sprctl |= SP_TILED; /* Sizes are 0 based */ @@ -467,7 +468,7 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); else I915_WRITE(SPLINOFF(pipe, plane), linear_offset); @@ -552,7 +553,7 @@ ivb_update_plane(struct drm_plane *plane, */ sprctl |= SPRITE_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) sprctl |= SPRITE_TILED; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) @@ -606,7 +607,7 @@ ivb_update_plane(struct drm_plane *plane, * register */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) I915_WRITE(SPROFFSET(pipe), (y << 16) | x); - else if (obj->tiling_mode != I915_TILING_NONE) + else if (i915_gem_object_is_tiled(obj)) I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); else I915_WRITE(SPRLINOFF(pipe), linear_offset); @@ -693,7 +694,7 @@ ilk_update_plane(struct drm_plane *plane, */ dvscntr |= DVS_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dvscntr |= DVS_TILED; if (IS_GEN6(dev)) @@ -736,7 +737,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); else I915_WRITE(DVSLINOFF(pipe), linear_offset); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ff80a81b1a84..43f833901b8e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -435,7 +435,7 @@ void intel_uncore_sanitize(struct drm_i915_private *dev_priv) i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_disable_gt_powersave(dev_priv); + intel_sanitize_gt_powersave(dev_priv); } static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, @@ -1618,8 +1618,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * @timeout_ms: timeout in millisecond * * This routine waits until the target register @reg contains the expected - * @value after applying the @mask, i.e. it waits until - * (I915_READ_FW(@reg) & @mask) == @value + * @value after applying the @mask, i.e. it waits until :: + * + * (I915_READ_FW(reg) & mask) == value + * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is @@ -1652,8 +1654,10 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, * @timeout_ms: timeout in millisecond * * This routine waits until the target register @reg contains the expected - * @value after applying the @mask, i.e. it waits until - * (I915_READ(@reg) & @mask) == @value + * @value after applying the @mask, i.e. it waits until :: + * + * (I915_READ(reg) & mask) == value + * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d7e81a3886fd..452629de7a57 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use @@ -698,15 +722,20 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) __u64 flags; - __u64 rsvd1; + union { + __u64 rsvd1; + __u64 pad_to_size; + }; __u64 rsvd2; }; @@ -897,6 +926,7 @@ struct drm_i915_gem_caching { #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 +#define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 #define I915_BIT_6_SWIZZLE_9 1 |