drm/i915: Invert the GEM wakeref hierarchy

In the current scheme, on submitting a request we take a single global GEM wakeref, which trickles down to wake up all GT power domains. This is undesirable as we would like to be able to localise our power management to the available power domains and to remove the global GEM operations from the heart of the driver. (The intent there is to push global GEM decisions to the boundary as used by the GEM user interface.) Now during request construction, each request is responsible via its logical context to acquire a wakeref on each power domain it intends to utilize. Currently, each request takes a wakeref on the engine(s) and the engines themselves take a chipset wakeref. This gives us a transition on each engine which we can extend if we want to insert more powermangement control (such as soft rc6). The global GEM operations that currently require a struct_mutex are reduced to listening to pm events from the chipset GT wakeref. As we reduce the struct_mutex requirement, these listeners should evaporate. Perhaps the biggest immediate change is that this removes the struct_mutex requirement around GT power management, allowing us greater flexibility in request construction. Another important knock-on effect, is that by tracking engine usage, we can insert a switch back to the kernel context on that engine immediately, avoiding any extra delay or inserting global synchronisation barriers. This makes tracking when an engine and its associated contexts are idle much easier -- important for when we forgo our assumed execution ordering and need idle barriers to unpin used contexts. In the process, it means we remove a large chunk of code whose only purpose was to switch back to the kernel context. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Imre Deak <imre.deak@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190424200717.1686-5-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-04-24 21:07:17 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-04-24 22:26:49 +0100
commit: 79ffac8599c4d8aa84d313920d3d86d7361c252b (patch)
tree: 3352bc82c4d7bb7cd9caaf42fda9ee97e0e88bde /drivers/gpu/drm/i915/i915_gem_pm.c
parent: 2ccdf6a1c3f7ff51d721ee7a5bed96e03da77205 (diff)
download: linux-stable-79ffac8599c4d8aa84d313920d3d86d7361c252b.tar.gz
linux-stable-79ffac8599c4d8aa84d313920d3d86d7361c252b.tar.bz2
linux-stable-79ffac8599c4d8aa84d313920d3d86d7361c252b.zip
1 files changed, 86 insertions, 178 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
index 9fb0e8d567a2..3554d55dae35 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/i915_gem_pm.c
@@ -4,136 +4,63 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gt/intel_gt_pm.h"
+
 #include "i915_drv.h"
 #include "i915_gem_pm.h"
 #include "i915_globals.h"
-#include "intel_pm.h"
 
-static void __i915_gem_park(struct drm_i915_private *i915)
+static void i915_gem_park(struct drm_i915_private *i915)
 {
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
-	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
-
-	if (!i915->gt.awake)
-		return;
-
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(i915->drm.irq);
-
-	intel_engines_park(i915);
-	i915_timelines_park(i915);
-
-	i915_pmu_gt_parked(i915);
-	i915_vma_parked(i915);
-
-	wakeref = fetch_and_zero(&i915->gt.awake);
-	GEM_BUG_ON(!wakeref);
-
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_idle(i915);
-
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
-
-	i915_globals_park();
-}
-
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
-					  unsigned long mask)
-{
-	bool result = true;
-
-	/*
-	 * Even if we fail to switch, give whatever is running a small chance
-	 * to save itself before we report the failure. Yes, this may be a
-	 * false positive due to e.g. ENOMEM, caveat emptor!
-	 */
-	if (i915_gem_switch_to_kernel_context(i915, mask))
-		result = false;
 
-	if (i915_gem_wait_for_idle(i915,
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT))
-		result = false;
+	for_each_engine(engine, i915, id) {
+		/*
+		 * We are committed now to parking the engines, make sure there
+		 * will be no more interrupts arriving later and the engines
+		 * are truly idle.
+		 */
+		if (wait_for(intel_engine_is_idle(engine), 10)) {
+			struct drm_printer p = drm_debug_printer(__func__);
 
-	if (!result) {
-		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
 			dev_err(i915->drm.dev,
-				"Failed to idle engines, declaring wedged!\n");
-			GEM_TRACE_DUMP();
+				"%s is not idle before parking\n",
+				engine->name);
+			intel_engine_dump(engine, &p, NULL);
 		}
+		tasklet_kill(&engine->execlists.tasklet);
 
-		/* Forcibly cancel outstanding work and leave the gpu quiet. */
-		i915_gem_set_wedged(i915);
+		i915_gem_batch_pool_fini(&engine->batch_pool);
 	}
 
-	i915_retire_requests(i915); /* ensure we flush after wedging */
-	return result;
+	i915_timelines_park(i915);
+	i915_vma_parked(i915);
+
+	i915_globals_park();
 }
 
 static void idle_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.idle_work.work);
-	bool rearm_hangcheck;
-
-	if (!READ_ONCE(i915->gt.awake))
-		return;
-
-	if (READ_ONCE(i915->gt.active_requests))
-		return;
-
-	rearm_hangcheck =
-		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
 	if (!mutex_trylock(&i915->drm.struct_mutex)) {
 		/* Currently busy, come back later */
 		mod_delayed_work(i915->wq,
 				 &i915->gem.idle_work,
 				 msecs_to_jiffies(50));
-		goto out_rearm;
+		return;
 	}
 
-	/*
-	 * Flush out the last user context, leaving only the pinned
-	 * kernel context resident. Should anything unfortunate happen
-	 * while we are idle (such as the GPU being power cycled), no users
-	 * will be harmed.
-	 */
-	if (!work_pending(&i915->gem.idle_work.work) &&
-	    !i915->gt.active_requests) {
-		++i915->gt.active_requests; /* don't requeue idle */
-
-		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
-
-		if (!--i915->gt.active_requests) {
-			__i915_gem_park(i915);
-			rearm_hangcheck = false;
-		}
-	}
+	intel_wakeref_lock(&i915->gt.wakeref);
+	if (!intel_wakeref_active(&i915->gt.wakeref))
+		i915_gem_park(i915);
+	intel_wakeref_unlock(&i915->gt.wakeref);
 
 	mutex_unlock(&i915->drm.struct_mutex);
-
-out_rearm:
-	if (rearm_hangcheck) {
-		GEM_BUG_ON(!i915->gt.awake);
-		i915_queue_hangcheck(i915);
-	}
 }
 
 static void retire_work_handler(struct work_struct *work)
@@ -147,97 +74,76 @@ static void retire_work_handler(struct work_struct *work)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	/*
-	 * Keep the retire handler running until we are finally idle.
-	 * We do not need to do this test under locking as in the worst-case
-	 * we queue the retire worker once too often.
-	 */
-	if (READ_ONCE(i915->gt.awake))
+	if (intel_wakeref_active(&i915->gt.wakeref))
 		queue_delayed_work(i915->wq,
 				   &i915->gem.retire_work,
 				   round_jiffies_up_relative(HZ));
 }
 
-void i915_gem_park(struct drm_i915_private *i915)
+static int pm_notifier(struct notifier_block *nb,
+		       unsigned long action,
+		       void *data)
 {
-	GEM_TRACE("\n");
+	struct drm_i915_private *i915 =
+		container_of(nb, typeof(*i915), gem.pm_notifier);
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
+	switch (action) {
+	case INTEL_GT_UNPARK:
+		i915_globals_unpark();
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+		break;
 
-	if (!i915->gt.awake)
-		return;
+	case INTEL_GT_PARK:
+		mod_delayed_work(i915->wq,
+				 &i915->gem.idle_work,
+				 msecs_to_jiffies(100));
+		break;
+	}
 
-	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
-	mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100));
+	return NOTIFY_OK;
 }
 
-void i915_gem_unpark(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 {
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915->gt.active_requests);
-	assert_rpm_wakelock_held(i915);
-
-	if (i915->gt.awake)
-		return;
-
-	/*
-	 * It seems that the DMC likes to transition between the DC states a lot
-	 * when there are no connected displays (no active power domains) during
-	 * command submission.
-	 *
-	 * This activity has negative impact on the performance of the chip with
-	 * huge latencies observed in the interrupt handler and elsewhere.
-	 *
-	 * Work around it by grabbing a GT IRQ power domain whilst there is any
-	 * GT activity, preventing any DC state transitions.
-	 */
-	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	i915_globals_unpark();
-
-	intel_enable_gt_powersave(i915);
-	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
-	i915_pmu_gt_unparked(i915);
-
-	intel_engines_unpark(i915);
+	bool result = true;
 
-	i915_queue_hangcheck(i915);
+	do {
+		if (i915_gem_wait_for_idle(i915,
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_FOR_IDLE_BOOST,
+					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+			/* XXX hide warning from gem_eio */
+			if (i915_modparams.reset) {
+				dev_err(i915->drm.dev,
+					"Failed to idle engines, declaring wedged!\n");
+				GEM_TRACE_DUMP();
+			}
+
+			/*
+			 * Forcibly cancel outstanding work and leave
+			 * the gpu quiet.
+			 */
+			i915_gem_set_wedged(i915);
+			result = false;
+		}
+	} while (i915_retire_requests(i915) && result);
 
-	queue_delayed_work(i915->wq,
-			   &i915->gem.retire_work,
-			   round_jiffies_up_relative(HZ));
+	GEM_BUG_ON(i915->gt.awake);
+	return result;
 }
 
 bool i915_gem_load_power_context(struct drm_i915_private *i915)
 {
-	/* Force loading the kernel context on all engines */
-	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
-		return false;
-
-	/*
-	 * Immediately park the GPU so that we enable powersaving and
-	 * treat it as idle. The next time we issue a request, we will
-	 * unpark and start using the engine->pinned_default_state, otherwise
-	 * it is in limbo and an early reset may fail.
-	 */
-	__i915_gem_park(i915);
-
-	return true;
+	return switch_to_kernel_context_sync(i915);
 }
 
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
-	intel_wakeref_t wakeref;
-
 	GEM_TRACE("\n");
 
-	wakeref = intel_runtime_pm_get(i915);
+	flush_workqueue(i915->wq);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
@@ -250,10 +156,16 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
+	switch_to_kernel_context_sync(i915);
 
 	mutex_unlock(&i915->drm.struct_mutex);
-	i915_reset_flush(i915);
+
+	/*
+	 * Assert that we successfully flushed all the work and
+	 * reset the GPU back to its idle, low power state.
+	 */
+	GEM_BUG_ON(i915->gt.awake);
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
 	drain_delayed_work(&i915->gem.retire_work);
 
@@ -263,17 +175,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gem.idle_work);
 
-	flush_workqueue(i915->wq);
-
-	/*
-	 * Assert that we successfully flushed all the work and
-	 * reset the GPU back to its idle, low power state.
-	 */
-	GEM_BUG_ON(i915->gt.awake);
+	i915_gem_drain_freed_objects(i915);
 
 	intel_uc_suspend(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
 }
 
 void i915_gem_suspend_late(struct drm_i915_private *i915)
@@ -362,4 +266,8 @@ void i915_gem_init__pm(struct drm_i915_private *i915)
 {
 	INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler);
 	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
+
+	i915->gem.pm_notifier.notifier_call = pm_notifier;
+	blocking_notifier_chain_register(&i915->gt.pm_notifications,
+					 &i915->gem.pm_notifier);
 }
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-04-24 21:07:17 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-04-24 22:26:49 +0100
commit	79ffac8599c4d8aa84d313920d3d86d7361c252b (patch)
tree	3352bc82c4d7bb7cd9caaf42fda9ee97e0e88bde /drivers/gpu/drm/i915/i915_gem_pm.c
parent	2ccdf6a1c3f7ff51d721ee7a5bed96e03da77205 (diff)
download	linux-stable-79ffac8599c4d8aa84d313920d3d86d7361c252b.tar.gz linux-stable-79ffac8599c4d8aa84d313920d3d86d7361c252b.tar.bz2 linux-stable-79ffac8599c4d8aa84d313920d3d86d7361c252b.zip