diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 77 |
1 files changed, 68 insertions, 9 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7e1abbbfba12..606922d9dd73 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -25,6 +25,7 @@ #include "xe_ring_ops_types.h" #include "xe_trace.h" #include "xe_vm.h" +#include "xe_pxp.h" enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_JOB_TIMEOUT = 0, @@ -38,6 +39,8 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue static void __xe_exec_queue_free(struct xe_exec_queue *q) { + if (xe_exec_queue_uses_pxp(q)) + xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); if (q->vm) xe_vm_put(q->vm); @@ -78,6 +81,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); + INIT_LIST_HEAD(&q->pxp.link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -112,6 +116,21 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) { struct xe_vm *vm = q->vm; int i, err; + u32 flags = 0; + + /* + * PXP workloads executing on RCS or CCS must run in isolation (i.e. no + * other workload can use the EUs at the same time). On MTL this is done + * by setting the RUNALONE bit in the LRC, while starting on Xe2 there + * is a dedicated bit for it. + */ + if (xe_exec_queue_uses_pxp(q) && + (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { + if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) + flags |= XE_LRC_CREATE_PXP; + else + flags |= XE_LRC_CREATE_RUNALONE; + } if (vm) { err = xe_vm_lock(vm, true); @@ -120,7 +139,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) } for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); goto err_unlock; @@ -153,6 +172,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v struct xe_exec_queue *q; int err; + /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ + xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); + q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, extensions); if (IS_ERR(q)) @@ -162,12 +184,26 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (err) goto err_post_alloc; + /* + * We can only add the queue to the PXP list after the init is complete, + * because the PXP termination can call exec_queue_kill and that will + * go bad if the queue is only half-initialized. This means that we + * can't do it when we handle the PXP extension in __xe_exec_queue_alloc + * and we need to do it here instead. + */ + if (xe_exec_queue_uses_pxp(q)) { + err = xe_pxp_exec_queue_add(xe->pxp, q); + if (err) + goto err_post_alloc; + } + return q; err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, @@ -250,6 +286,9 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + if (xe_exec_queue_uses_pxp(q)) + xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); + xe_exec_queue_last_fence_put_unlocked(q); if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, @@ -405,6 +444,22 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return 0; } +static int +exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) +{ + if (value == DRM_XE_PXP_TYPE_NONE) + return 0; + + /* we only support HWDRM sessions right now */ + if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) + return -EINVAL; + + if (!xe_pxp_is_enabled(xe->pxp)) + return -ENODEV; + + return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -412,6 +467,7 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -431,7 +487,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, ARRAY_SIZE(exec_queue_set_property_funcs)) || XE_IOCTL_DBG(xe, ext.pad) || XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); @@ -483,7 +540,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, +static u32 calc_validate_logical_mask(struct xe_device *xe, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) { @@ -545,15 +602,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; struct xe_vm *vm; - struct xe_gt *gt; struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; + u32 flags = 0; u32 id; u32 len; int err; - if (XE_IOCTL_DBG(xe, args->flags) || + if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; @@ -570,6 +627,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) return -EINVAL; + if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) + flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { if (XE_IOCTL_DBG(xe, args->width != 1) || XE_IOCTL_DBG(xe, args->num_placements != 1) || @@ -578,8 +638,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, for_each_tile(tile, xe, id) { struct xe_exec_queue *new; - u32 flags = EXEC_QUEUE_FLAG_VM; + flags |= EXEC_QUEUE_FLAG_VM; if (id) flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; @@ -598,8 +658,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, &q->multi_gt_link); } } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, + logical_mask = calc_validate_logical_mask(xe, eci, args->width, args->num_placements); if (XE_IOCTL_DBG(xe, !logical_mask)) @@ -626,7 +685,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0, + args->width, hwe, flags, args->extensions); up_read(&vm->lock); xe_vm_put(vm); |