summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c')
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c313
1 files changed, 212 insertions, 101 deletions
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index e598a4363d01..cca9cf536f76 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -7,18 +7,60 @@
#include "abi/guc_actions_abi.h"
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
+#include "xe_gt_stats.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
#include "xe_trace.h"
+#include "regs/xe_guc_regs.h"
-#define TLB_TIMEOUT (HZ / 4)
+#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
+
+/*
+ * TLB inval depends on pending commands in the CT queue and then the real
+ * invalidation time. Double up the time to process full CT queue
+ * just to be on the safe side.
+ */
+static long tlb_timeout_jiffies(struct xe_gt *gt)
+{
+ /* this reflects what HW/GuC needs to process TLB inv request */
+ const long hw_tlb_timeout = HZ / 4;
+
+ /* this estimates actual delay caused by the CTB transport */
+ long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct);
+
+ return hw_tlb_timeout + 2 * delay;
+}
+
+static void
+__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
+{
+ bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
+
+ trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
+ xe_gt_tlb_invalidation_fence_fini(fence);
+ dma_fence_signal(&fence->base);
+ if (!stack)
+ dma_fence_put(&fence->base);
+}
+
+static void
+invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
+{
+ list_del(&fence->link);
+ __invalidation_fence_signal(xe, fence);
+}
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{
struct xe_gt *gt = container_of(work, struct xe_gt,
tlb_invalidation.fence_tdr.work);
+ struct xe_device *xe = gt_to_xe(gt);
struct xe_gt_tlb_invalidation_fence *fence, *next;
spin_lock_irq(&gt->tlb_invalidation.pending_lock);
@@ -27,22 +69,20 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
s64 since_inval_ms = ktime_ms_delta(ktime_get(),
fence->invalidation_time);
- if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
+ if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt))
break;
- trace_xe_gt_tlb_invalidation_fence_timeout(fence);
+ trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence);
xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
fence->seqno, gt->tlb_invalidation.seqno_recv);
- list_del(&fence->link);
fence->base.error = -ETIME;
- dma_fence_signal(&fence->base);
- dma_fence_put(&fence->base);
+ invalidation_fence_signal(xe, fence);
}
if (!list_empty(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq,
&gt->tlb_invalidation.fence_tdr,
- TLB_TIMEOUT);
+ tlb_timeout_jiffies(gt));
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
}
@@ -67,21 +107,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
return 0;
}
-static void
-__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
-{
- trace_xe_gt_tlb_invalidation_fence_signal(fence);
- dma_fence_signal(&fence->base);
- dma_fence_put(&fence->base);
-}
-
-static void
-invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
-{
- list_del(&fence->link);
- __invalidation_fence_signal(fence);
-}
-
/**
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
* @gt: graphics tile
@@ -91,7 +116,6 @@ invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
{
struct xe_gt_tlb_invalidation_fence *fence, *next;
- struct xe_guc *guc = &gt->uc.guc;
int pending_seqno;
/*
@@ -114,11 +138,10 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
else
pending_seqno = gt->tlb_invalidation.seqno - 1;
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
- wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link)
- invalidation_fence_signal(fence);
+ invalidation_fence_signal(gt_to_xe(gt), fence);
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
mutex_unlock(&gt->uc.guc.ct.lock);
}
@@ -141,9 +164,12 @@ static int send_tlb_invalidation(struct xe_guc *guc,
u32 *action, int len)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
int seqno;
int ret;
+ xe_gt_assert(gt, fence);
+
/*
* XXX: The seqno algorithm relies on TLB invalidation being processed
* in order which they currently are, if that changes the algorithm will
@@ -152,14 +178,12 @@ static int send_tlb_invalidation(struct xe_guc *guc,
mutex_lock(&guc->ct.lock);
seqno = gt->tlb_invalidation.seqno;
- if (fence) {
- fence->seqno = seqno;
- trace_xe_gt_tlb_invalidation_fence_send(fence);
- }
+ fence->seqno = seqno;
+ trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
action[1] = seqno;
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1);
- if (!ret && fence) {
+ if (!ret) {
spin_lock_irq(&gt->tlb_invalidation.pending_lock);
/*
* We haven't actually published the TLB fence as per
@@ -168,7 +192,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
* we can just go ahead and signal the fence here.
*/
if (tlb_invalidation_seqno_past(gt, seqno)) {
- __invalidation_fence_signal(fence);
+ __invalidation_fence_signal(xe, fence);
} else {
fence->invalidation_time = ktime_get();
list_add_tail(&fence->link,
@@ -177,20 +201,20 @@ static int send_tlb_invalidation(struct xe_guc *guc,
if (list_is_singular(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq,
&gt->tlb_invalidation.fence_tdr,
- TLB_TIMEOUT);
+ tlb_timeout_jiffies(gt));
}
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
- } else if (ret < 0 && fence) {
- __invalidation_fence_signal(fence);
+ } else if (ret < 0) {
+ __invalidation_fence_signal(xe, fence);
}
if (!ret) {
gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
TLB_INVALIDATION_SEQNO_MAX;
if (!gt->tlb_invalidation.seqno)
gt->tlb_invalidation.seqno = 1;
- ret = seqno;
}
mutex_unlock(&guc->ct.lock);
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
return ret;
}
@@ -202,14 +226,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
/**
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
* @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
*
* Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
- * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
+ * caller can use the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: 0 on success, negative error code on error
*/
-int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence)
{
u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION,
@@ -217,41 +243,87 @@ int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
};
- return send_tlb_invalidation(&gt->uc.guc, NULL, action,
+ return send_tlb_invalidation(&gt->uc.guc, fence, action,
ARRAY_SIZE(action));
}
/**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT
+ * @gt: graphics tile
+ *
+ * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
+ * synchronous.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
+ gt->uc.guc.submission_state.enabled) {
+ struct xe_gt_tlb_invalidation_fence fence;
+ int ret;
+
+ xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
+ ret = xe_gt_tlb_invalidation_guc(gt, &fence);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence);
+ return ret;
+ }
+
+ xe_gt_tlb_invalidation_fence_wait(&fence);
+ } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+ xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+ PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+ xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+ PVC_GUC_TLB_INV_DESC0_VALID);
+ } else {
+ xe_mmio_write32(gt, GUC_TLB_INV_CR,
+ GUC_TLB_INV_CR_INVALIDATE);
+ }
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+
+ return 0;
+}
+
+/**
+ * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
+ * address range
+ *
* @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
- * completion, can be NULL
- * @vma: VMA to invalidate
+ * completion
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
- * completion.
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: Negative error code on error, 0 on success
*/
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- struct xe_vma *vma)
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ u64 start, u64 end, u32 asid)
{
struct xe_device *xe = gt_to_xe(gt);
#define MAX_TLB_INVALIDATION_LEN 7
u32 action[MAX_TLB_INVALIDATION_LEN];
int len = 0;
- xe_gt_assert(gt, vma);
+ xe_gt_assert(gt, fence);
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist) {
- if (fence)
- __invalidation_fence_signal(fence);
-
+ __invalidation_fence_signal(xe, fence);
return 0;
}
@@ -260,9 +332,9 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
if (!xe->info.has_range_tlb_invalidation) {
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
} else {
- u64 start = xe_vma_start(vma);
- u64 length = xe_vma_size(vma);
- u64 align, end;
+ u64 orig_start = start;
+ u64 length = end - start;
+ u64 align;
if (length < SZ_4K)
length = SZ_4K;
@@ -274,12 +346,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
* address mask covering the required range.
*/
align = roundup_pow_of_two(length);
- start = ALIGN_DOWN(xe_vma_start(vma), align);
- end = ALIGN(xe_vma_end(vma), align);
+ start = ALIGN_DOWN(start, align);
+ end = ALIGN(end, align);
length = align;
while (start + length < end) {
length <<= 1;
- start = ALIGN_DOWN(xe_vma_start(vma), length);
+ start = ALIGN_DOWN(orig_start, length);
}
/*
@@ -288,16 +360,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
*/
if (length >= SZ_2M) {
length = max_t(u64, SZ_16M, length);
- start = ALIGN_DOWN(xe_vma_start(vma), length);
+ start = ALIGN_DOWN(orig_start, length);
}
xe_gt_assert(gt, length >= SZ_4K);
xe_gt_assert(gt, is_power_of_2(length));
- xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+ xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+ ilog2(SZ_2M) + 1)));
xe_gt_assert(gt, IS_ALIGNED(start, length));
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
- action[len++] = xe_vma_vm(vma)->usm.asid;
+ action[len++] = asid;
action[len++] = lower_32_bits(start);
action[len++] = upper_32_bits(start);
action[len++] = ilog2(length) - ilog2(SZ_4K);
@@ -309,41 +382,27 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
}
/**
- * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
* @gt: graphics tile
- * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
*
- * Wait for 200ms for a TLB invalidation to complete, in practice we always
- * should receive the TLB invalidation within 200ms.
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
*
- * Return: 0 on success, -ETIME on TLB invalidation timeout
+ * Return: Negative error code on error, 0 on success
*/
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ struct xe_vma *vma)
{
- struct xe_guc *guc = &gt->uc.guc;
- int ret;
-
- /* Execlists not supported */
- if (gt_to_xe(gt)->info.force_execlist)
- return 0;
-
- /*
- * XXX: See above, this algorithm only works if seqno are always in
- * order
- */
- ret = wait_event_timeout(guc->ct.wq,
- tlb_invalidation_seqno_past(gt, seqno),
- TLB_TIMEOUT);
- if (!ret) {
- struct drm_printer p = xe_gt_err_printer(gt);
-
- xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
- seqno, gt->tlb_invalidation.seqno_recv);
- xe_guc_ct_print(&guc->ct, &p, true);
- return -ETIME;
- }
+ xe_gt_assert(gt, vma);
- return 0;
+ return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
+ xe_vma_end(vma),
+ xe_vma_vm(vma)->usm.asid);
}
/**
@@ -361,6 +420,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
struct xe_gt_tlb_invalidation_fence *fence, *next;
unsigned long flags;
@@ -388,27 +448,22 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
- /*
- * wake_up_all() and wait_event_timeout() already have the correct
- * barriers.
- */
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
- wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) {
- trace_xe_gt_tlb_invalidation_fence_recv(fence);
+ trace_xe_gt_tlb_invalidation_fence_recv(xe, fence);
if (!tlb_invalidation_seqno_past(gt, fence->seqno))
break;
- invalidation_fence_signal(fence);
+ invalidation_fence_signal(xe, fence);
}
if (!list_empty(&gt->tlb_invalidation.pending_fences))
mod_delayed_work(system_wq,
&gt->tlb_invalidation.fence_tdr,
- TLB_TIMEOUT);
+ tlb_timeout_jiffies(gt));
else
cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
@@ -416,3 +471,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+ return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+ return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+ .get_driver_name = invalidation_fence_get_driver_name,
+ .get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+/**
+ * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
+ * @gt: GT
+ * @fence: TLB invalidation fence to initialize
+ * @stack: fence is stack variable
+ *
+ * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
+ * must be called if fence is not signaled.
+ */
+void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ bool stack)
+{
+ xe_pm_runtime_get_noresume(gt_to_xe(gt));
+
+ spin_lock_irq(&gt->tlb_invalidation.lock);
+ dma_fence_init(&fence->base, &invalidation_fence_ops,
+ &gt->tlb_invalidation.lock,
+ dma_fence_context_alloc(1), 1);
+ spin_unlock_irq(&gt->tlb_invalidation.lock);
+ INIT_LIST_HEAD(&fence->link);
+ if (stack)
+ set_bit(FENCE_STACK_BIT, &fence->base.flags);
+ else
+ dma_fence_get(&fence->base);
+ fence->gt = gt;
+}
+
+/**
+ * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
+ * @fence: TLB invalidation fence to finalize
+ *
+ * Drop PM ref which fence took durinig init.
+ */
+void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ xe_pm_runtime_put(gt_to_xe(fence->gt));
+}