From 687db2207b1bc94ca34743871167923a6de78d85 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:42 +0300 Subject: gpu: host1x: Add DMA fence implementation Add an implementation of dma_fences based on syncpoints. Syncpoint interrupts are used to signal fences. Additionally, after software signaling has been enabled, a 30 second timeout is started. If the syncpoint threshold is not reached within this period, the fence is signalled with an -ETIMEDOUT error code. This is to allow fences that would never reach their syncpoint threshold to be cleaned up. The timeout can potentially be removed in the future after job tracking code has been refactored. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9b0487c88571..eb4cc8c964a4 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -170,6 +170,8 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, u32 syncpt_id); +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); + /* * host1x channel */ -- cgit v1.2.3 From c78f837ae3d1e532ff4eb90155b42d7a2e892a3f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:43 +0300 Subject: gpu: host1x: Add no-recovery mode Add a new property for jobs to enable or disable recovery i.e. CPU increments of syncpoints to max value on job timeout. This allows for a more solid model for hanged jobs, where userspace doesn't need to guess if a syncpoint increment happened because the job completed, or because job timeout was triggered. On job timeout, we stop the channel, NOP all future jobs on the channel using the same syncpoint, mark the syncpoint as locked and resume the channel from the next job, if any. The future jobs are NOPed, since because we don't do the CPU increments, the value of the syncpoint is no longer synchronized, and any waiters would become confused if a future job incremented the syncpoint. The syncpoint is marked locked to ensure that any future jobs cannot increment the syncpoint either, until the application has recognized the situation and reallocated the syncpoint. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index eb4cc8c964a4..8da088ad0b5e 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -236,9 +236,15 @@ struct host1x_job { u32 syncpt_incrs; u32 syncpt_end; + /* Completion waiter ref */ + void *waiter; + /* Maximum time to wait for this job */ unsigned int timeout; + /* Job has timed out and should be released */ + bool cancelled; + /* Index and number of slots used in the push buffer */ unsigned int first_get; unsigned int num_slots; @@ -259,6 +265,9 @@ struct host1x_job { /* Add a channel wait for previous ops to complete */ bool serialize; + + /* Fast-forward syncpoint increments on job timeout */ + bool syncpt_recovery; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, -- cgit v1.2.3 From 17a298e9ac7c011e64a9c0b6f807b43f9af22eac Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:44 +0300 Subject: gpu: host1x: Add job release callback Add a callback field to the job structure, to be called just before the job is to be freed. This allows the job's submitter to clean up any of its own state, like decrement runtime PM refcounts. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 8da088ad0b5e..57271ab1fee8 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -268,6 +268,10 @@ struct host1x_job { /* Fast-forward syncpoint increments on job timeout */ bool syncpt_recovery; + + /* Callback called when job is freed */ + void (*release)(struct host1x_job *job); + void *user_data; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, -- cgit v1.2.3 From e902585fc8b639f1a1258eaa6265e98994e34ef8 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:45 +0300 Subject: gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer Add support for inserting syncpoint waits in the CDMA pushbuffer. These waits need to be done in HOST1X class, while gather submitted by the application execute in engine class. Support is added by converting the gather list of job into a command list that can include both gathers and waits. When the job is submitted, these commands are pushed as the appropriate opcodes on the CDMA pushbuffer. Also supported are waits relative to the start of the job, which are useful for jobs doing multiple things with an engine that doesn't natively support pipelining. While at it, use 32-bit waits on chips that support them. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 57271ab1fee8..2127762fc63d 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -218,8 +218,8 @@ struct host1x_job { struct host1x_client *client; /* Gathers and their memory */ - struct host1x_job_gather *gathers; - unsigned int num_gathers; + struct host1x_job_cmd *cmds; + unsigned int num_cmds; /* Array of handles to be pinned & unpinned */ struct host1x_reloc *relocs; @@ -278,6 +278,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, u32 num_cmdbufs, u32 num_relocs); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class); struct host1x_job *host1x_job_get(struct host1x_job *job); void host1x_job_put(struct host1x_job *job); int host1x_job_pin(struct host1x_job *job, struct device *dev); -- cgit v1.2.3 From 0fddaa85d66140466df8e848afcda452b7d7b416 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:46 +0300 Subject: gpu: host1x: Add option to skip firewall for a job The new UAPI will have its own firewall, and we don't want to run the firewall in the Host1x driver for those jobs. As such, add a parameter to host1x_job_alloc to specify if we want to skip the firewall in the Host1x driver. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 2127762fc63d..7bccf589aba7 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -272,10 +272,14 @@ struct host1x_job { /* Callback called when job is freed */ void (*release)(struct host1x_job *job); void *user_data; + + /* Whether host1x-side firewall should be ran for this job or not */ + bool enable_firewall; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs); + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset); void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, -- cgit v1.2.3 From 57e203953d150e6304ab6936bd2d9aa2daa687f4 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 10 Jun 2021 14:04:48 +0300 Subject: drm/tegra: Add new UAPI to header Update the tegra_drm.h UAPI header, adding the new proposed UAPI. The old staging UAPI is left in for now, with minor modification to avoid name collisions. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/uapi/drm/tegra_drm.h | 425 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 402 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h index c4df3c3668b3..94cfc306d50a 100644 --- a/include/uapi/drm/tegra_drm.h +++ b/include/uapi/drm/tegra_drm.h @@ -1,24 +1,5 @@ -/* - * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) 2012-2020 NVIDIA Corporation */ #ifndef _UAPI_TEGRA_DRM_H_ #define _UAPI_TEGRA_DRM_H_ @@ -29,6 +10,8 @@ extern "C" { #endif +/* Tegra DRM legacy UAPI. Only enabled with STAGING */ + #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0) #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1) @@ -649,8 +632,8 @@ struct drm_tegra_gem_get_flags { #define DRM_TEGRA_SYNCPT_READ 0x02 #define DRM_TEGRA_SYNCPT_INCR 0x03 #define DRM_TEGRA_SYNCPT_WAIT 0x04 -#define DRM_TEGRA_OPEN_CHANNEL 0x05 -#define DRM_TEGRA_CLOSE_CHANNEL 0x06 +#define DRM_TEGRA_OPEN_CHANNEL 0x05 +#define DRM_TEGRA_CLOSE_CHANNEL 0x06 #define DRM_TEGRA_GET_SYNCPT 0x07 #define DRM_TEGRA_SUBMIT 0x08 #define DRM_TEGRA_GET_SYNCPT_BASE 0x09 @@ -674,6 +657,402 @@ struct drm_tegra_gem_get_flags { #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags) #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags) +/* New Tegra DRM UAPI */ + +/* + * Reported by the driver in the `capabilities` field. + * + * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent + * with regard to the system memory. + */ +#define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0) + +struct drm_tegra_channel_open { + /** + * @host1x_class: [in] + * + * Host1x class of the engine that will be programmed using this + * channel. + */ + __u32 host1x_class; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @context: [out] + * + * Opaque identifier corresponding to the opened channel. + */ + __u32 context; + + /** + * @version: [out] + * + * Version of the engine hardware. This can be used by userspace + * to determine how the engine needs to be programmed. + */ + __u32 version; + + /** + * @capabilities: [out] + * + * Flags describing the hardware capabilities. + */ + __u32 capabilities; + __u32 padding; +}; + +struct drm_tegra_channel_close { + /** + * @context: [in] + * + * Identifier of the channel to close. + */ + __u32 context; + __u32 padding; +}; + +/* + * Mapping flags that can be used to influence how the mapping is created. + * + * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access + * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access + */ +#define DRM_TEGRA_CHANNEL_MAP_READ (1 << 0) +#define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1) +#define DRM_TEGRA_CHANNEL_MAP_READ_WRITE (DRM_TEGRA_CHANNEL_MAP_READ | \ + DRM_TEGRA_CHANNEL_MAP_WRITE) + +struct drm_tegra_channel_map { + /** + * @context: [in] + * + * Identifier of the channel to which make memory available for. + */ + __u32 context; + + /** + * @handle: [in] + * + * GEM handle of the memory to map. + */ + __u32 handle; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @mapping: [out] + * + * Identifier corresponding to the mapping, to be used for + * relocations or unmapping later. + */ + __u32 mapping; +}; + +struct drm_tegra_channel_unmap { + /** + * @context: [in] + * + * Channel identifier of the channel to unmap memory from. + */ + __u32 context; + + /** + * @mapping: [in] + * + * Mapping identifier of the memory mapping to unmap. + */ + __u32 mapping; +}; + +/* Submission */ + +/** + * Specify that bit 39 of the patched-in address should be set to switch + * swizzling between Tegra and non-Tegra sector layout on systems that store + * surfaces in system memory in non-Tegra sector layout. + */ +#define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0) + +struct drm_tegra_submit_buf { + /** + * @mapping: [in] + * + * Identifier of the mapping to use in the submission. + */ + __u32 mapping; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * Information for relocation patching. + */ + struct { + /** + * @target_offset: [in] + * + * Offset from the start of the mapping of the data whose + * address is to be patched into the gather. + */ + __u64 target_offset; + + /** + * @gather_offset_words: [in] + * + * Offset in words from the start of the gather data to + * where the address should be patched into. + */ + __u32 gather_offset_words; + + /** + * @shift: [in] + * + * Number of bits the address should be shifted right before + * patching in. + */ + __u32 shift; + } reloc; +}; + +/** + * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr` + * buffer. Each GATHER_UPTR command uses successive words from the buffer. + */ +#define DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR 0 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT 1 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. The threshold is calculated relative to the start of the job. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE 2 + +struct drm_tegra_submit_cmd_gather_uptr { + __u32 words; + __u32 reserved[3]; +}; + +struct drm_tegra_submit_cmd_wait_syncpt { + __u32 id; + __u32 value; + __u32 reserved[2]; +}; + +struct drm_tegra_submit_cmd { + /** + * @type: [in] + * + * Command type to execute. One of the DRM_TEGRA_SUBMIT_CMD* + * defines. + */ + __u32 type; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + union { + struct drm_tegra_submit_cmd_gather_uptr gather_uptr; + struct drm_tegra_submit_cmd_wait_syncpt wait_syncpt; + __u32 reserved[4]; + }; +}; + +struct drm_tegra_submit_syncpt { + /** + * @id: [in] + * + * ID of the syncpoint that the job will increment. + */ + __u32 id; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @increments: [in] + * + * Number of times the job will increment this syncpoint. + */ + __u32 increments; + + /** + * @value: [out] + * + * Value the syncpoint will have once the job has completed all + * its specified syncpoint increments. + * + * Note that the kernel may increment the syncpoint before or after + * the job. These increments are not reflected in this field. + * + * If the job hangs or times out, not all of the increments may + * get executed. + */ + __u32 value; +}; + +struct drm_tegra_channel_submit { + /** + * @context: [in] + * + * Identifier of the channel to submit this job to. + */ + __u32 context; + + /** + * @num_bufs: [in] + * + * Number of elements in the `bufs_ptr` array. + */ + __u32 num_bufs; + + /** + * @num_cmds: [in] + * + * Number of elements in the `cmds_ptr` array. + */ + __u32 num_cmds; + + /** + * @gather_data_words: [in] + * + * Number of 32-bit words in the `gather_data_ptr` array. + */ + __u32 gather_data_words; + + /** + * @bufs_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_buf structures. + */ + __u64 bufs_ptr; + + /** + * @cmds_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_cmd structures. + */ + __u64 cmds_ptr; + + /** + * @gather_data_ptr: [in] + * + * Pointer to an array of Host1x opcodes to be used by GATHER_UPTR + * commands. + */ + __u64 gather_data_ptr; + + /** + * @syncobj_in: [in] + * + * Handle for DRM syncobj that will be waited before submission. + * Ignored if zero. + */ + __u32 syncobj_in; + + /** + * @syncobj_out: [in] + * + * Handle for DRM syncobj that will have its fence replaced with + * the job's completion fence. Ignored if zero. + */ + __u32 syncobj_out; + + /** + * @syncpt_incr: [in,out] + * + * Information about the syncpoint the job will increment. + */ + struct drm_tegra_submit_syncpt syncpt; +}; + +struct drm_tegra_syncpoint_allocate { + /** + * @id: [out] + * + * ID of allocated syncpoint. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_free { + /** + * @id: [in] + * + * ID of syncpoint to free. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_wait { + /** + * @timeout: [in] + * + * Absolute timestamp at which the wait will time out. + */ + __s64 timeout_ns; + + /** + * @id: [in] + * + * ID of syncpoint to wait on. + */ + __u32 id; + + /** + * @threshold: [in] + * + * Threshold to wait for. + */ + __u32 threshold; + + /** + * @value: [out] + * + * Value of the syncpoint upon wait completion. + */ + __u32 value; + + __u32 padding; +}; + +#define DRM_IOCTL_TEGRA_CHANNEL_OPEN DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open) +#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close) +#define DRM_IOCTL_TEGRA_CHANNEL_MAP DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map) +#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap) +#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit) + +#define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate) +#define DRM_IOCTL_TEGRA_SYNCPOINT_FREE DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free) +#define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait) + #if defined(__cplusplus) } #endif -- cgit v1.2.3