summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-12 11:32:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-12 11:32:19 -0800
commitcf65598d5909acf5e7b7dc9e21786e386356bc81 (patch)
tree44745a47bfc24f7016ff3c3b1ee6d3b8abf517d7 /drivers/gpu/drm/nouveau
parent70d201a40823acba23899342d62bc2644051ad2e (diff)
parentb76c01f1d950425924ee1c1377760de3c024ef78 (diff)
downloadlinux-cf65598d5909acf5e7b7dc9e21786e386356bc81.tar.gz
linux-cf65598d5909acf5e7b7dc9e21786e386356bc81.tar.bz2
linux-cf65598d5909acf5e7b7dc9e21786e386356bc81.zip
Merge tag 'drm-next-2024-01-10' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "This contains two major new drivers: - imagination is a first driver for Imagination Technologies devices, it only covers very specific devices, but there is hope to grow it - xe is a reboot of the i915 GPU (shares display) side using a more upstream focused development model, and trying to maximise code sharing. It's not enabled for any hw by default, and will hopefully get switched on for Intel's Lunarlake. This also drops a bunch of the old UMS ioctls. It's been dead long enough. amdgpu has a bunch of new color management code that is being used in the Steam Deck. amdgpu also has a new ACPI WBRF interaction to help avoid radio interference. Otherwise it's the usual lots of changes in lots of places. Detailed summary: new drivers: - imagination - new driver for Imagination Technologies GPU - xe - new driver for Intel GPUs using core drm concepts core: - add CLOSE_FB ioctl - remove old UMS ioctls - increase max objects to accomodate AMD color mgmt encoder: - create per-encoder debugfs directory edid: - split out drm_eld - SAD helpers - drop edid_firmware module parameter format-helper: - cache format conversion buffers sched: - move from kthread to workqueue - rename some internals - implement dynamic job-flow control gpuvm: - provide more features to handle GEM objects client: - don't acquire module reference displayport: - add mst path property documentation fdinfo: - alignment fix dma-buf: - add fence timestamp helper - add fence deadline support bridge: - transparent aux-bridge for DP/USB-C - lt8912b: add suspend/resume support and power regulator support panel: - edp: AUO B116XTN02, BOE NT116WHM-N21,836X2, NV116WHM-N49 - chromebook panel support - elida-kd35t133: rework pm - powkiddy RK2023 panel - himax-hx8394: drop prepare/unprepare and shutdown logic - BOE BP101WX1-100, Powkiddy X55, Ampire AM8001280G - Evervision VGG644804, SDC ATNA45AF01 - nv3052c: register docs, init sequence fixes, fascontek FS035VG158 - st7701: Anbernic RG-ARC support - r63353 panel controller - Ilitek ILI9805 panel controller - AUO G156HAN04.0 simplefb: - support memory regions - support power domains amdgpu: - add new 64-bit sequence number infrastructure - add AMD specific color management - ACPI WBRF support for RF interference handling - GPUVM updates - RAS updates - DCN 3.5 updates - Rework PCIe link speed handling - Document GPU reset types - DMUB fixes - eDP fixes - NBIO 7.9/7.11 updates - SubVP updates - XGMI PCIe state dumping for aqua vanjaram - GFX11 golden register updates - enable tunnelling on high pri compute amdkfd: - Migrate TLB flushing logic to amdgpu - Trap handler fixes - Fix restore workers handling on suspend/resume - Fix possible memory leak in pqm_uninit() - support import/export of dma-bufs using GEM handles radeon: - fix possible overflows in command buffer checking - check for errors in ring_lock i915: - reorg display code for reuse in xe driver - fdinfo memory stats printing - DP MST bandwidth mgmt improvements - DP panel replay enabling - MTL C20 phy state verification - MTL DP DSC fractional bpp support - Audio fastset support - use dma_fence interfaces instead of i915_sw_fence - Separate gem and display code - AUX register macro refactoring - Separate display module/device parameters - Move display capabilities debugfs under display - Makefile cleanups - Register cleanups - Move display lock inits under display/ - VLV/CHV DPIO PHY register and interface refactoring - DSI VBT sequence refactoring - C10/C20 PHY PLL hardware readout - DPLL code cleanups - Cleanup PXP plane protection checks - Improve display debug msgs - PSR selective fetch fixes/improvements - DP MST fixes - Xe2LPD FBC restrictions removed - DGFX uses direct VBT pin mapping - more MTL WAs - fix MTL eDP bug - eliminate use of kmap_atomic habanalabs: - sysfs entry to identify a device minor id with debugfs path - sysfs entry to expose device module id - add signed device info retrieval through INFO ioctl - add Gaudi2C device support - pcie reset prepare/done hooks msm: - Add support for SDM670, SM8650 - Handle the CFG interconnect to fix the obscure hangs / timeouts - Kconfig fix for QMP dependency - use managed allocators - DPU: SDM670, SM8650 support - DPU: Enable SmartDMA on SM8350 and SM8450 - DP: enable runtime PM support - GPU: add metadata UAPI - GPU: move devcoredumps to GPU device - GPU: convert to drm_exec ivpu: - update FW API - new debugfs file - a new NOP job submission test mode - improve suspend/resume - PM improvements - MMU PT optimizations - firmware profile frequency support - support for uncached buffers - switch to gem shmem helpers - replace kthread with threaded irqs rockchip: - rk3066_hdmi: convert to atomic - vop2: support nv20 and nv30 - rk3588 support mediatek: - use devm_platform_ioremap_resource - stop using iommu_present - MT8188 VDOSYS1 display support panfrost: - PM improvements - improve interrupt handling as poweroff qaic: - allow to run with single MSI - support host/device time sync - switch to persistent DRM devices exynos: - fix potential error pointer dereference - fix wrong error checking - add missing call to drm_atomic_helper_shutdown omapdrm: - dma-fence lockdep annotation fix tidss: - dma-fence lockdep annotation fix - support for AM62A7 v3d: - BCM2712 - rpi5 support - fdinfo + gputop support - uapi for CPU job handling virtio-gpu: - add context debug name" * tag 'drm-next-2024-01-10' of git://anongit.freedesktop.org/drm/drm: (2340 commits) drm/amd/display: Allow z8/z10 from driver drm/amd/display: fix bandwidth validation failure on DCN 2.1 drm/amdgpu: apply the RV2 system aperture fix to RN/CZN as well drm/amd/display: Move fixpt_from_s3132 to amdgpu_dm drm/amd/display: Fix recent checkpatch errors in amdgpu_dm Revert "drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole" drm/amd/display: avoid stringop-overflow warnings for dp_decide_lane_settings() drm/amd/display: Fix power_helpers.c codestyle drm/amd/display: Fix hdcp_log.h codestyle drm/amd/display: Fix hdcp2_execution.c codestyle drm/amd/display: Fix hdcp_psp.h codestyle drm/amd/display: Fix freesync.c codestyle drm/amd/display: Fix hdcp_psp.c codestyle drm/amd/display: Fix hdcp1_execution.c codestyle drm/amd/pm/smu7: fix a memleak in smu7_hwmgr_backend_init drm/amdkfd: Fix iterator used outside loop in 'kfd_add_peer_prop()' drm/amdgpu: Drop 'fence' check in 'to_amdgpu_amdkfd_fence()' drm/amdkfd: Confirm list is non-empty before utilizing list_first_entry in kfd_topology.c drm/amdgpu: Fix '*fw' from request_firmware() not released in 'amdgpu_ucode_request()' drm/amdgpu: Fix variable 'mca_funcs' dereferenced before NULL check in 'amdgpu_mca_smu_get_mca_entry()' ...
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.c19
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c15
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.h5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c36
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h19
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.c68
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.h6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_platform.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.c207
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.h43
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c380
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.h12
-rw-r--r--drivers/gpu/drm/nouveau/nv04_fence.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c1
17 files changed, 429 insertions, 411 deletions
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 118807e38422..8d37a694b772 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -38,7 +38,9 @@
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
#include <drm/drm_fb_helper.h>
+#include <drm/drm_fixed.h>
#include <drm/drm_probe_helper.h>
#include <drm/drm_vblank.h>
@@ -945,7 +947,8 @@ nv50_msto_prepare(struct drm_atomic_state *state,
if (ret == 0) {
nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index,
payload->vc_start_slot, payload->time_slots,
- payload->pbn, payload->time_slots * mst_state->pbn_div);
+ payload->pbn,
+ payload->time_slots * dfixed_trunc(mst_state->pbn_div));
} else {
nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
}
@@ -982,15 +985,14 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
const int clock = crtc_state->adjusted_mode.clock;
asyh->or.bpc = connector->display_info.bpc;
- asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3,
- false);
+ asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3 << 4);
}
mst_state = drm_atomic_get_mst_topology_state(state, &mstm->mgr);
if (IS_ERR(mst_state))
return PTR_ERR(mst_state);
- if (!mst_state->pbn_div) {
+ if (!mst_state->pbn_div.full) {
struct nouveau_encoder *outp = mstc->mstm->outp;
mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr,
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 2edd7bb13fae..a04156ca8390 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
{
struct nouveau_abi16_ntfy *ntfy, *temp;
- /* When a client exits without waiting for it's queued up jobs to
- * finish it might happen that we fault the channel. This is due to
- * drm_file_free() calling drm_gem_release() before the postclose()
- * callback. Hence, we can't tear down this scheduler entity before
- * uvmm mappings are unmapped. Currently, we can't detect this case.
- *
- * However, this should be rare and harmless, since the channel isn't
- * needed anymore.
- */
- nouveau_sched_entity_fini(&chan->sched_entity);
+ /* Cancel all jobs from the entity's queue. */
+ drm_sched_entity_fini(&chan->sched.entity);
- /* wait for all activity to stop before cleaning up */
if (chan->chan)
nouveau_channel_idle(chan->chan);
+ nouveau_sched_fini(&chan->sched);
+
/* cleanup notifier state */
list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
nouveau_abi16_ntfy_fini(chan, ntfy);
@@ -344,8 +337,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;
- ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched,
- drm->sched_wq);
+ ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
+ chan->chan->dma.ib_max);
if (ret)
goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 9f538486c10e..1f5e243c0c75 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
struct nouveau_bo *ntfy;
struct nouveau_vma *ntfy_vma;
struct nvkm_mm heap;
- struct nouveau_sched_entity sched_entity;
+ struct nouveau_sched sched;
};
struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 280d1d9a559b..00cc7d1abaa3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
* If nouveau_bo_new() allocated this buffer, the GEM object was never
* initialized, so don't attempt to release it.
*/
- if (bo->base.dev)
+ if (bo->base.dev) {
+ /* Gem objects not being shared with other VMs get their
+ * dma_resv from a root GEM object.
+ */
+ if (nvbo->no_share)
+ drm_gem_object_put(nvbo->r_obj);
+
drm_gem_object_release(&bo->base);
- else
+ } else {
dma_resv_fini(&bo->base._resv);
+ }
kfree(nvbo);
}
@@ -1055,17 +1062,18 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
{
struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_bo *nvbo = nouveau_bo(bo);
+ struct drm_gem_object *obj = &bo->base;
struct ttm_resource *old_reg = bo->resource;
struct nouveau_drm_tile *new_tile = NULL;
int ret = 0;
-
if (new_reg->mem_type == TTM_PL_TT) {
ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
if (ret)
return ret;
}
+ drm_gpuvm_bo_gem_evict(obj, evict);
nouveau_bo_move_ntfy(bo, new_reg);
ret = ttm_bo_wait_ctx(bo, ctx);
if (ret)
@@ -1130,6 +1138,7 @@ out:
out_ntfy:
if (ret) {
nouveau_bo_move_ntfy(bo, bo->resource);
+ drm_gpuvm_bo_gem_evict(obj, !evict);
}
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 07f671cf895e..70c551921a9e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,11 @@ struct nouveau_bo {
struct list_head entry;
int pbbo_index;
bool validate_mapped;
+
+ /* Root GEM object we derive the dma_resv of in case this BO is not
+ * shared between VMs.
+ */
+ struct drm_gem_object *r_obj;
bool no_share;
/* GPU address space is independent of CPU word size */
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 50589f982d1a..6f6c31a9937b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
static void
nouveau_cli_fini(struct nouveau_cli *cli)
{
+ struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli);
+
/* All our channels are dead now, which means all the fences they
* own are signalled, and all callback functions have been called.
*
@@ -199,8 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli)
WARN_ON(!list_empty(&cli->worker));
usif_client_fini(cli);
- nouveau_uvmm_fini(&cli->uvmm);
- nouveau_sched_entity_fini(&cli->sched_entity);
+ nouveau_sched_fini(&cli->sched);
+ if (uvmm)
+ nouveau_uvmm_fini(uvmm);
nouveau_vmm_fini(&cli->svm);
nouveau_vmm_fini(&cli->vmm);
nvif_mmu_dtor(&cli->mmu);
@@ -307,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
cli->mem = &mems[ret];
- ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched,
- drm->sched_wq);
+ /* Don't pass in the (shared) sched_wq in order to let
+ * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+ *
+ * This is required to ensure that for VM_BIND jobs free_job() work and
+ * run_job() work can always run concurrently and hence, free_job() work
+ * can never stall run_job() work. For EXEC jobs we don't have this
+ * requirement, since EXEC job's free_job() does not require to take any
+ * locks which indirectly or directly are held for allocations
+ * elsewhere.
+ */
+ ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
if (ret)
goto done;
@@ -579,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev)
nvif_parent_ctor(&nouveau_parent, &drm->parent);
drm->master.base.object.parent = &drm->parent;
- ret = nouveau_sched_init(drm);
- if (ret)
+ drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0,
+ WQ_MAX_ACTIVE);
+ if (!drm->sched_wq) {
+ ret = -ENOMEM;
goto fail_alloc;
+ }
ret = nouveau_cli_init(drm, "DRM-master", &drm->master);
if (ret)
- goto fail_sched;
+ goto fail_wq;
ret = nouveau_cli_init(drm, "DRM", &drm->client);
if (ret)
@@ -655,8 +670,8 @@ fail_ttm:
nouveau_cli_fini(&drm->client);
fail_master:
nouveau_cli_fini(&drm->master);
-fail_sched:
- nouveau_sched_fini(drm);
+fail_wq:
+ destroy_workqueue(drm->sched_wq);
fail_alloc:
nvif_parent_dtor(&drm->parent);
kfree(drm);
@@ -708,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev)
}
mutex_unlock(&drm->clients_lock);
- nouveau_sched_fini(drm);
-
nouveau_cli_fini(&drm->client);
nouveau_cli_fini(&drm->master);
+ destroy_workqueue(drm->sched_wq);
nvif_parent_dtor(&drm->parent);
mutex_destroy(&drm->clients_lock);
kfree(drm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index e73a233c6572..8a6d94c8b163 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -93,9 +93,12 @@ struct nouveau_cli {
struct nvif_mmu mmu;
struct nouveau_vmm vmm;
struct nouveau_vmm svm;
- struct nouveau_uvmm uvmm;
+ struct {
+ struct nouveau_uvmm *ptr;
+ bool disabled;
+ } uvmm;
- struct nouveau_sched_entity sched_entity;
+ struct nouveau_sched sched;
const struct nvif_mclass *mem;
@@ -121,10 +124,7 @@ struct nouveau_cli_work {
static inline struct nouveau_uvmm *
nouveau_cli_uvmm(struct nouveau_cli *cli)
{
- if (!cli || !cli->uvmm.vmm.cli)
- return NULL;
-
- return &cli->uvmm;
+ return cli ? cli->uvmm.ptr : NULL;
}
static inline struct nouveau_uvmm *
@@ -258,6 +258,9 @@ struct nouveau_drm {
u64 context_base;
} *runl;
+ /* Workqueue used for channel schedulers. */
+ struct workqueue_struct *sched_wq;
+
/* context for accelerated drm-internal operations */
struct nouveau_channel *cechan;
struct nouveau_channel *channel;
@@ -298,10 +301,6 @@ struct nouveau_drm {
struct mutex lock;
bool component_registered;
} audio;
-
- struct drm_gpu_scheduler sched;
- struct workqueue_struct *sched_wq;
-
};
static inline struct nouveau_drm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index 9a5ef574744b..bc5d71b79ab2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: MIT
-#include <drm/drm_exec.h>
-
#include "nouveau_drv.h"
#include "nouveau_gem.h"
#include "nouveau_mem.h"
@@ -86,14 +84,12 @@
*/
static int
-nouveau_exec_job_submit(struct nouveau_job *job)
+nouveau_exec_job_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
struct nouveau_cli *cli = job->cli;
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
int ret;
/* Create a new fence, but do not emit yet. */
@@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_job *job)
return ret;
nouveau_uvmm_lock(uvmm);
- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
- drm_exec_until_all_locked(exec) {
- struct drm_gpuva *va;
-
- drm_gpuvm_for_each_va(va, &uvmm->base) {
- if (unlikely(va == &uvmm->base.kernel_alloc_node))
- continue;
-
- ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
- drm_exec_retry_on_contention(exec);
- if (ret)
- goto err_uvmm_unlock;
- }
+ ret = drm_gpuvm_exec_lock(vme);
+ if (ret) {
+ nouveau_uvmm_unlock(uvmm);
+ return ret;
}
nouveau_uvmm_unlock(uvmm);
- drm_exec_for_each_locked_object(exec, index, obj) {
- struct nouveau_bo *nvbo = nouveau_gem_object(obj);
-
- ret = nouveau_bo_validate(nvbo, true, false);
- if (ret)
- goto err_exec_fini;
+ ret = drm_gpuvm_exec_validate(vme);
+ if (ret) {
+ drm_gpuvm_exec_unlock(vme);
+ return ret;
}
return 0;
-
-err_uvmm_unlock:
- nouveau_uvmm_unlock(uvmm);
-err_exec_fini:
- drm_exec_fini(exec);
- return ret;
-
}
static void
-nouveau_exec_job_armed_submit(struct nouveau_job *job)
+nouveau_exec_job_armed_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
-
- drm_exec_for_each_locked_object(exec, index, obj)
- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
- drm_exec_fini(exec);
+ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+ job->resv_usage, job->resv_usage);
+ drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
@@ -192,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+ nouveau_job_done(job);
nouveau_job_free(job);
kfree(exec_job->fence);
@@ -211,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job)
NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
chan->chid);
- nouveau_sched_entity_fini(job->entity);
-
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -259,10 +231,12 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob,
}
}
+ args.file_priv = __args->file_priv;
job->chan = __args->chan;
- args.sched_entity = __args->sched_entity;
- args.file_priv = __args->file_priv;
+ args.sched = __args->sched;
+ /* Plus one to account for the HW fence. */
+ args.credits = job->push.count + 1;
args.in_sync.count = __args->in_sync.count;
args.in_sync.s = __args->in_sync.s;
@@ -415,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
if (ret)
goto out;
- args.sched_entity = &chan16->sched_entity;
+ args.sched = &chan16->sched;
args.file_priv = file_priv;
args.chan = chan;
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h
index 5488d337bcc0..9b3b151facfd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.h
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
@@ -3,16 +3,12 @@
#ifndef __NOUVEAU_EXEC_H__
#define __NOUVEAU_EXEC_H__
-#include <drm/drm_exec.h>
-
#include "nouveau_drv.h"
#include "nouveau_sched.h"
struct nouveau_exec_job_args {
struct drm_file *file_priv;
- struct nouveau_sched_entity *sched_entity;
-
- struct drm_exec exec;
+ struct nouveau_sched *sched;
struct nouveau_channel *chan;
struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a0d303e5ce3d..49c2bcbef129 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
return 0;
- if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv)
+ if (nvbo->no_share && uvmm &&
+ drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv)
return -EPERM;
ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
@@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
if (unlikely(!uvmm))
return -EINVAL;
- resv = &uvmm->resv;
+ resv = drm_gpuvm_resv(&uvmm->base);
}
if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART)))
@@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
nvbo->valid_domains &= domain;
+ if (nvbo->no_share) {
+ nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base);
+ drm_gem_object_get(nvbo->r_obj);
+ }
+
*pnvbo = nvbo;
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 23cd43a7fd19..bf2dc7567ea4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -43,11 +43,10 @@ static int nouveau_platform_probe(struct platform_device *pdev)
return 0;
}
-static int nouveau_platform_remove(struct platform_device *pdev)
+static void nouveau_platform_remove(struct platform_device *pdev)
{
struct drm_device *dev = platform_get_drvdata(pdev);
nouveau_drm_device_remove(dev);
- return 0;
}
#if IS_ENABLED(CONFIG_OF)
@@ -93,5 +92,5 @@ struct platform_driver nouveau_platform_driver = {
.of_match_table = of_match_ptr(nouveau_platform_match),
},
.probe = nouveau_platform_probe,
- .remove = nouveau_platform_remove,
+ .remove_new = nouveau_platform_remove,
};
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 7c376c4ccdcf..dd98f6910f9c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -12,30 +12,28 @@
#include "nouveau_abi16.h"
#include "nouveau_sched.h"
-/* FIXME
- *
- * We want to make sure that jobs currently executing can't be deferred by
- * other jobs competing for the hardware. Otherwise we might end up with job
- * timeouts just because of too many clients submitting too many jobs. We don't
- * want jobs to time out because of system load, but because of the job being
- * too bulky.
- *
- * For now allow for up to 16 concurrent jobs in flight until we know how many
- * rings the hardware can process in parallel.
- */
-#define NOUVEAU_SCHED_HW_SUBMISSIONS 16
#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
+/* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
+ * index to the run-queue array.
+ */
+enum nouveau_sched_priority {
+ NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
+ NOUVEAU_SCHED_PRIORITY_COUNT,
+};
+
int
nouveau_job_init(struct nouveau_job *job,
struct nouveau_job_args *args)
{
- struct nouveau_sched_entity *entity = args->sched_entity;
+ struct nouveau_sched *sched = args->sched;
int ret;
+ INIT_LIST_HEAD(&job->entry);
+
job->file_priv = args->file_priv;
job->cli = nouveau_cli(args->file_priv);
- job->entity = entity;
+ job->sched = sched;
job->sync = args->sync;
job->resv_usage = args->resv_usage;
@@ -86,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job,
ret = -ENOMEM;
goto err_free_objs;
}
-
}
- ret = drm_sched_job_init(&job->base, &entity->base, NULL);
+ ret = drm_sched_job_init(&job->base, &sched->entity,
+ args->credits, NULL);
if (ret)
goto err_free_chains;
@@ -109,6 +107,27 @@ return ret;
}
void
+nouveau_job_fini(struct nouveau_job *job)
+{
+ dma_fence_put(job->done_fence);
+ drm_sched_job_cleanup(&job->base);
+
+ job->ops->free(job);
+}
+
+void
+nouveau_job_done(struct nouveau_job *job)
+{
+ struct nouveau_sched *sched = job->sched;
+
+ spin_lock(&sched->job.list.lock);
+ list_del(&job->entry);
+ spin_unlock(&sched->job.list.lock);
+
+ wake_up(&sched->job.wq);
+}
+
+void
nouveau_job_free(struct nouveau_job *job)
{
kfree(job->in_sync.data);
@@ -117,13 +136,6 @@ nouveau_job_free(struct nouveau_job *job)
kfree(job->out_sync.chains);
}
-void nouveau_job_fini(struct nouveau_job *job)
-{
- dma_fence_put(job->done_fence);
- drm_sched_job_cleanup(&job->base);
- job->ops->free(job);
-}
-
static int
sync_find_fence(struct nouveau_job *job,
struct drm_nouveau_sync *sync,
@@ -261,8 +273,13 @@ nouveau_job_fence_attach(struct nouveau_job *job)
int
nouveau_job_submit(struct nouveau_job *job)
{
- struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
+ struct nouveau_sched *sched = job->sched;
struct dma_fence *done_fence = NULL;
+ struct drm_gpuvm_exec vm_exec = {
+ .vm = &nouveau_cli_uvmm(job->cli)->base,
+ .flags = DRM_EXEC_IGNORE_DUPLICATES,
+ .num_fences = 1,
+ };
int ret;
ret = nouveau_job_add_deps(job);
@@ -276,46 +293,29 @@ nouveau_job_submit(struct nouveau_job *job)
/* Make sure the job appears on the sched_entity's queue in the same
* order as it was submitted.
*/
- mutex_lock(&entity->mutex);
+ mutex_lock(&sched->mutex);
/* Guarantee we won't fail after the submit() callback returned
* successfully.
*/
if (job->ops->submit) {
- ret = job->ops->submit(job);
+ ret = job->ops->submit(job, &vm_exec);
if (ret)
goto err_cleanup;
}
+ /* Submit was successful; add the job to the schedulers job list. */
+ spin_lock(&sched->job.list.lock);
+ list_add(&job->entry, &sched->job.list.head);
+ spin_unlock(&sched->job.list.lock);
+
drm_sched_job_arm(&job->base);
job->done_fence = dma_fence_get(&job->base.s_fence->finished);
if (job->sync)
done_fence = dma_fence_get(job->done_fence);
- /* If a sched job depends on a dma-fence from a job from the same GPU
- * scheduler instance, but a different scheduler entity, the GPU
- * scheduler does only wait for the particular job to be scheduled,
- * rather than for the job to fully complete. This is due to the GPU
- * scheduler assuming that there is a scheduler instance per ring.
- * However, the current implementation, in order to avoid arbitrary
- * amounts of kthreads, has a single scheduler instance while scheduler
- * entities represent rings.
- *
- * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
- * out-fences in order to force the scheduler to wait for full job
- * completion for dependent jobs from different entities and same
- * scheduler instance.
- *
- * There is some work in progress [1] to address the issues of firmware
- * schedulers; once it is in-tree the scheduler topology in Nouveau
- * should be re-worked accordingly.
- *
- * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
- */
- set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
-
if (job->ops->armed_submit)
- job->ops->armed_submit(job);
+ job->ops->armed_submit(job, &vm_exec);
nouveau_job_fence_attach(job);
@@ -326,7 +326,7 @@ nouveau_job_submit(struct nouveau_job *job)
drm_sched_entity_push_job(&job->base);
- mutex_unlock(&entity->mutex);
+ mutex_unlock(&sched->mutex);
if (done_fence) {
dma_fence_wait(done_fence, true);
@@ -336,20 +336,13 @@ nouveau_job_submit(struct nouveau_job *job)
return 0;
err_cleanup:
- mutex_unlock(&entity->mutex);
+ mutex_unlock(&sched->mutex);
nouveau_job_fence_attach_cleanup(job);
err:
job->state = NOUVEAU_JOB_SUBMIT_FAILED;
return ret;
}
-bool
-nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
- struct work_struct *work)
-{
- return queue_work(entity->sched_wq, work);
-}
-
static struct dma_fence *
nouveau_job_run(struct nouveau_job *job)
{
@@ -399,50 +392,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
nouveau_job_fini(job);
}
-int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
- struct drm_gpu_scheduler *sched,
- struct workqueue_struct *sched_wq)
-{
- mutex_init(&entity->mutex);
- spin_lock_init(&entity->job.list.lock);
- INIT_LIST_HEAD(&entity->job.list.head);
- init_waitqueue_head(&entity->job.wq);
-
- entity->sched_wq = sched_wq;
- return drm_sched_entity_init(&entity->base,
- DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
-}
-
-void
-nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
-{
- drm_sched_entity_destroy(&entity->base);
-}
-
static const struct drm_sched_backend_ops nouveau_sched_ops = {
.run_job = nouveau_sched_run_job,
.timedout_job = nouveau_sched_timedout_job,
.free_job = nouveau_sched_free_job,
};
-int nouveau_sched_init(struct nouveau_drm *drm)
+int
+nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit)
{
- struct drm_gpu_scheduler *sched = &drm->sched;
+ struct drm_gpu_scheduler *drm_sched = &sched->base;
+ struct drm_sched_entity *entity = &sched->entity;
long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
+ int ret;
- drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
- if (!drm->sched_wq)
- return -ENOMEM;
+ if (!wq) {
+ wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
+ current->pid);
+ if (!wq)
+ return -ENOMEM;
+
+ sched->wq = wq;
+ }
- return drm_sched_init(sched, &nouveau_sched_ops,
- DRM_SCHED_PRIORITY_COUNT,
- NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
- NULL, NULL, "nouveau_sched", drm->dev->dev);
+ ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
+ NOUVEAU_SCHED_PRIORITY_COUNT,
+ credit_limit, 0, job_hang_limit,
+ NULL, NULL, "nouveau_sched", drm->dev->dev);
+ if (ret)
+ goto fail_wq;
+
+ /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
+ * when we want to have a single run-queue only.
+ *
+ * It's not documented, but one will find out when trying to use any
+ * other priority running into faults, because the scheduler uses the
+ * priority as array index.
+ *
+ * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
+ * matching the enum type used in drm_sched_entity_init().
+ */
+ ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
+ &drm_sched, 1, NULL);
+ if (ret)
+ goto fail_sched;
+
+ mutex_init(&sched->mutex);
+ spin_lock_init(&sched->job.list.lock);
+ INIT_LIST_HEAD(&sched->job.list.head);
+ init_waitqueue_head(&sched->job.wq);
+
+ return 0;
+
+fail_sched:
+ drm_sched_fini(drm_sched);
+fail_wq:
+ if (sched->wq)
+ destroy_workqueue(sched->wq);
+ return ret;
}
-void nouveau_sched_fini(struct nouveau_drm *drm)
+void
+nouveau_sched_fini(struct nouveau_sched *sched)
{
- destroy_workqueue(drm->sched_wq);
- drm_sched_fini(&drm->sched);
+ struct drm_gpu_scheduler *drm_sched = &sched->base;
+ struct drm_sched_entity *entity = &sched->entity;
+
+ rmb(); /* for list_empty to work without lock */
+ wait_event(sched->job.wq, list_empty(&sched->job.list.head));
+
+ drm_sched_entity_fini(entity);
+ drm_sched_fini(drm_sched);
+
+ /* Destroy workqueue after scheduler tear down, otherwise it might still
+ * be in use.
+ */
+ if (sched->wq)
+ destroy_workqueue(sched->wq);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index 27ac19792597..a6528f5981e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -5,7 +5,7 @@
#include <linux/types.h>
-#include <drm/drm_exec.h>
+#include <drm/drm_gpuvm.h>
#include <drm/gpu_scheduler.h>
#include "nouveau_drv.h"
@@ -26,7 +26,8 @@ enum nouveau_job_state {
struct nouveau_job_args {
struct drm_file *file_priv;
- struct nouveau_sched_entity *sched_entity;
+ struct nouveau_sched *sched;
+ u32 credits;
enum dma_resv_usage resv_usage;
bool sync;
@@ -49,12 +50,12 @@ struct nouveau_job {
enum nouveau_job_state state;
- struct nouveau_sched_entity *entity;
+ struct nouveau_sched *sched;
+ struct list_head entry;
struct drm_file *file_priv;
struct nouveau_cli *cli;
- struct drm_exec exec;
enum dma_resv_usage resv_usage;
struct dma_fence *done_fence;
@@ -76,8 +77,8 @@ struct nouveau_job {
/* If .submit() returns without any error, it is guaranteed that
* armed_submit() is called.
*/
- int (*submit)(struct nouveau_job *);
- void (*armed_submit)(struct nouveau_job *);
+ int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
+ void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
struct dma_fence *(*run)(struct nouveau_job *);
void (*free)(struct nouveau_job *);
enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *);
@@ -90,20 +91,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args,
int nouveau_job_init(struct nouveau_job *job,
struct nouveau_job_args *args);
-void nouveau_job_free(struct nouveau_job *job);
-
-int nouveau_job_submit(struct nouveau_job *job);
void nouveau_job_fini(struct nouveau_job *job);
+int nouveau_job_submit(struct nouveau_job *job);
+void nouveau_job_done(struct nouveau_job *job);
+void nouveau_job_free(struct nouveau_job *job);
-#define to_nouveau_sched_entity(entity) \
- container_of((entity), struct nouveau_sched_entity, base)
-
-struct nouveau_sched_entity {
- struct drm_sched_entity base;
+struct nouveau_sched {
+ struct drm_gpu_scheduler base;
+ struct drm_sched_entity entity;
+ struct workqueue_struct *wq;
struct mutex mutex;
- struct workqueue_struct *sched_wq;
-
struct {
struct {
struct list_head head;
@@ -113,15 +111,8 @@ struct nouveau_sched_entity {
} job;
};
-int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
- struct drm_gpu_scheduler *sched,
- struct workqueue_struct *sched_wq);
-void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity);
-
-bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
- struct work_struct *work);
-
-int nouveau_sched_init(struct nouveau_drm *drm);
-void nouveau_sched_fini(struct nouveau_drm *drm);
+int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_fini(struct nouveau_sched *sched);
#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 5cf892c50f43..4f223c972c6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -62,6 +62,8 @@ struct bind_job_op {
enum vm_bind_op op;
u32 flags;
+ struct drm_gpuvm_bo *vm_bo;
+
struct {
u64 addr;
u64 range;
@@ -436,8 +438,9 @@ nouveau_uvma_region_complete(struct nouveau_uvma_region *reg)
static void
op_map_prepare_unwind(struct nouveau_uvma *uvma)
{
+ struct drm_gpuva *va = &uvma->va;
nouveau_uvma_gem_put(uvma);
- drm_gpuva_remove(&uvma->va);
+ drm_gpuva_remove(va);
nouveau_uvma_free(uvma);
}
@@ -466,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
break;
case DRM_GPUVA_OP_REMAP: {
struct drm_gpuva_op_remap *r = &op->remap;
+ struct drm_gpuva *va = r->unmap->va;
if (r->next)
op_map_prepare_unwind(new->next);
@@ -473,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
if (r->prev)
op_map_prepare_unwind(new->prev);
- op_unmap_prepare_unwind(r->unmap->va);
+ op_unmap_prepare_unwind(va);
break;
}
case DRM_GPUVA_OP_UNMAP:
@@ -604,6 +608,9 @@ op_unmap_prepare(struct drm_gpuva_op_unmap *u)
drm_gpuva_unmap(u);
}
+/*
+ * Note: @args should not be NULL when calling for a map operation.
+ */
static int
nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
struct nouveau_uvma_prealloc *new,
@@ -624,7 +631,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
if (ret)
goto unwind;
- if (args && vmm_get_range) {
+ if (vmm_get_range) {
ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start,
vmm_get_range);
if (ret) {
@@ -632,6 +639,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
goto unwind;
}
}
+
break;
}
case DRM_GPUVA_OP_REMAP: {
@@ -929,25 +937,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm,
static int
nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range)
{
- u64 end = addr + range;
- u64 kernel_managed_end = uvmm->kernel_managed_addr +
- uvmm->kernel_managed_size;
-
if (addr & ~PAGE_MASK)
return -EINVAL;
if (range & ~PAGE_MASK)
return -EINVAL;
- if (end <= addr)
- return -EINVAL;
-
- if (addr < NOUVEAU_VA_SPACE_START ||
- end > NOUVEAU_VA_SPACE_END)
- return -EINVAL;
-
- if (addr < kernel_managed_end &&
- end > uvmm->kernel_managed_addr)
+ if (!drm_gpuvm_range_valid(&uvmm->base, addr, range))
return -EINVAL;
return 0;
@@ -970,6 +966,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref)
{
struct nouveau_uvmm_bind_job *job =
container_of(kref, struct nouveau_uvmm_bind_job, kref);
+ struct bind_job_op *op, *next;
+
+ list_for_each_op_safe(op, next, &job->ops) {
+ list_del(&op->entry);
+ kfree(op);
+ }
nouveau_job_free(&job->base);
kfree(job);
@@ -1011,14 +1013,16 @@ bind_validate_op(struct nouveau_job *job,
static void
bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
{
- struct nouveau_uvmm_bind_job *bind_job;
- struct nouveau_sched_entity *entity = job->entity;
+ struct nouveau_sched *sched = job->sched;
+ struct nouveau_job *__job;
struct bind_job_op *op;
u64 end = addr + range;
again:
- spin_lock(&entity->job.list.lock);
- list_for_each_entry(bind_job, &entity->job.list.head, entry) {
+ spin_lock(&sched->job.list.lock);
+ list_for_each_entry(__job, &sched->job.list.head, entry) {
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
+
list_for_each_op(op, &bind_job->ops) {
if (op->op == OP_UNMAP) {
u64 op_addr = op->va.addr;
@@ -1026,7 +1030,7 @@ again:
if (!(end <= op_addr || addr >= op_end)) {
nouveau_uvmm_bind_job_get(bind_job);
- spin_unlock(&entity->job.list.lock);
+ spin_unlock(&sched->job.list.lock);
wait_for_completion(&bind_job->complete);
nouveau_uvmm_bind_job_put(bind_job);
goto again;
@@ -1034,7 +1038,7 @@ again:
}
}
}
- spin_unlock(&entity->job.list.lock);
+ spin_unlock(&sched->job.list.lock);
}
static int
@@ -1113,22 +1117,28 @@ bind_validate_region(struct nouveau_job *job)
}
static void
-bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
+bind_link_gpuvas(struct bind_job_op *bop)
{
+ struct nouveau_uvma_prealloc *new = &bop->new;
+ struct drm_gpuvm_bo *vm_bo = bop->vm_bo;
+ struct drm_gpuva_ops *ops = bop->ops;
struct drm_gpuva_op *op;
drm_gpuva_for_each_op(op, ops) {
switch (op->op) {
case DRM_GPUVA_OP_MAP:
- drm_gpuva_link(&new->map->va);
+ drm_gpuva_link(&new->map->va, vm_bo);
break;
- case DRM_GPUVA_OP_REMAP:
+ case DRM_GPUVA_OP_REMAP: {
+ struct drm_gpuva *va = op->remap.unmap->va;
+
if (op->remap.prev)
- drm_gpuva_link(&new->prev->va);
+ drm_gpuva_link(&new->prev->va, va->vm_bo);
if (op->remap.next)
- drm_gpuva_link(&new->next->va);
- drm_gpuva_unlink(op->remap.unmap->va);
+ drm_gpuva_link(&new->next->va, va->vm_bo);
+ drm_gpuva_unlink(va);
break;
+ }
case DRM_GPUVA_OP_UNMAP:
drm_gpuva_unlink(op->unmap.va);
break;
@@ -1139,21 +1149,70 @@ bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
}
static int
-nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
+bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+ struct bind_job_op *op;
+ int ret;
+
+ list_for_each_op(op, &bind_job->ops) {
+ struct drm_gpuva_op *va_op;
+
+ if (!op->ops)
+ continue;
+
+ drm_gpuva_for_each_op(va_op, op->ops) {
+ struct drm_gem_object *obj = op_gem_obj(va_op);
+
+ if (unlikely(!obj))
+ continue;
+
+ ret = drm_exec_prepare_obj(exec, obj, num_fences);
+ if (ret)
+ return ret;
+
+ /* Don't validate GEMs backing mappings we're about to
+ * unmap, it's not worth the effort.
+ */
+ if (va_op->op == DRM_GPUVA_OP_UNMAP)
+ continue;
+
+ ret = nouveau_bo_validate(nouveau_gem_object(obj),
+ true, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
- struct nouveau_sched_entity *entity = job->entity;
- struct drm_exec *exec = &job->exec;
+ struct drm_exec *exec = &vme->exec;
struct bind_job_op *op;
int ret;
list_for_each_op(op, &bind_job->ops) {
if (op->op == OP_MAP) {
- op->gem.obj = drm_gem_object_lookup(job->file_priv,
- op->gem.handle);
- if (!op->gem.obj)
+ struct drm_gem_object *obj = op->gem.obj =
+ drm_gem_object_lookup(job->file_priv,
+ op->gem.handle);
+ if (!obj)
return -ENOENT;
+
+ dma_resv_lock(obj->resv, NULL);
+ op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj);
+ dma_resv_unlock(obj->resv);
+ if (IS_ERR(op->vm_bo))
+ return PTR_ERR(op->vm_bo);
+
+ drm_gpuvm_bo_extobj_add(op->vm_bo);
}
ret = bind_validate_op(job, op);
@@ -1176,6 +1235,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
* unwind all GPU VA space changes on failure.
*/
nouveau_uvmm_lock(uvmm);
+
list_for_each_op(op, &bind_job->ops) {
switch (op->op) {
case OP_MAP_SPARSE:
@@ -1287,55 +1347,13 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
}
}
- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(exec, vme->flags, 0);
drm_exec_until_all_locked(exec) {
- list_for_each_op(op, &bind_job->ops) {
- struct drm_gpuva_op *va_op;
-
- if (IS_ERR_OR_NULL(op->ops))
- continue;
-
- drm_gpuva_for_each_op(va_op, op->ops) {
- struct drm_gem_object *obj = op_gem_obj(va_op);
-
- if (unlikely(!obj))
- continue;
-
- ret = drm_exec_prepare_obj(exec, obj, 1);
- drm_exec_retry_on_contention(exec);
- if (ret) {
- op = list_last_op(&bind_job->ops);
- goto unwind;
- }
- }
- }
- }
-
- list_for_each_op(op, &bind_job->ops) {
- struct drm_gpuva_op *va_op;
-
- if (IS_ERR_OR_NULL(op->ops))
- continue;
-
- drm_gpuva_for_each_op(va_op, op->ops) {
- struct drm_gem_object *obj = op_gem_obj(va_op);
-
- if (unlikely(!obj))
- continue;
-
- /* Don't validate GEMs backing mappings we're about to
- * unmap, it's not worth the effort.
- */
- if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP))
- continue;
-
- ret = nouveau_bo_validate(nouveau_gem_object(obj),
- true, false);
- if (ret) {
- op = list_last_op(&bind_job->ops);
- goto unwind;
- }
+ ret = bind_lock_validate(job, exec, vme->num_fences);
+ drm_exec_retry_on_contention(exec);
+ if (ret) {
+ op = list_last_op(&bind_job->ops);
+ goto unwind;
}
}
@@ -1364,7 +1382,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
case OP_UNMAP_SPARSE:
case OP_MAP:
case OP_UNMAP:
- bind_link_gpuvas(op->ops, &op->new);
+ bind_link_gpuvas(op);
break;
default:
break;
@@ -1372,10 +1390,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
}
nouveau_uvmm_unlock(uvmm);
- spin_lock(&entity->job.list.lock);
- list_add(&bind_job->entry, &entity->job.list.head);
- spin_unlock(&entity->job.list.lock);
-
return 0;
unwind_continue:
@@ -1410,21 +1424,17 @@ unwind:
}
nouveau_uvmm_unlock(uvmm);
- drm_exec_fini(exec);
+ drm_gpuvm_exec_unlock(vme);
return ret;
}
static void
-nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job)
+nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
-
- drm_exec_for_each_locked_object(exec, index, obj)
- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
- drm_exec_fini(exec);
+ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+ job->resv_usage, job->resv_usage);
+ drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
@@ -1462,14 +1472,11 @@ out:
}
static void
-nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
+nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job)
{
- struct nouveau_uvmm_bind_job *bind_job =
- container_of(work, struct nouveau_uvmm_bind_job, work);
- struct nouveau_job *job = &bind_job->base;
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
- struct nouveau_sched_entity *entity = job->entity;
- struct bind_job_op *op, *next;
+ struct bind_job_op *op;
list_for_each_op(op, &bind_job->ops) {
struct drm_gem_object *obj = op->gem.obj;
@@ -1511,42 +1518,27 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
if (!IS_ERR_OR_NULL(op->ops))
drm_gpuva_ops_free(&uvmm->base, op->ops);
+ if (!IS_ERR_OR_NULL(op->vm_bo)) {
+ dma_resv_lock(obj->resv, NULL);
+ drm_gpuvm_bo_put(op->vm_bo);
+ dma_resv_unlock(obj->resv);
+ }
+
if (obj)
drm_gem_object_put(obj);
}
- spin_lock(&entity->job.list.lock);
- list_del(&bind_job->entry);
- spin_unlock(&entity->job.list.lock);
-
+ nouveau_job_done(job);
complete_all(&bind_job->complete);
- wake_up(&entity->job.wq);
-
- /* Remove and free ops after removing the bind job from the job list to
- * avoid races against bind_validate_map_sparse().
- */
- list_for_each_op_safe(op, next, &bind_job->ops) {
- list_del(&op->entry);
- kfree(op);
- }
nouveau_uvmm_bind_job_put(bind_job);
}
-static void
-nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job)
-{
- struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
- struct nouveau_sched_entity *entity = job->entity;
-
- nouveau_sched_entity_qwork(entity, &bind_job->work);
-}
-
static struct nouveau_job_ops nouveau_bind_job_ops = {
.submit = nouveau_uvmm_bind_job_submit,
.armed_submit = nouveau_uvmm_bind_job_armed_submit,
.run = nouveau_uvmm_bind_job_run,
- .free = nouveau_uvmm_bind_job_free_qwork,
+ .free = nouveau_uvmm_bind_job_cleanup,
};
static int
@@ -1607,7 +1599,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
return ret;
INIT_LIST_HEAD(&job->ops);
- INIT_LIST_HEAD(&job->entry);
for (i = 0; i < __args->op.count; i++) {
ret = bind_job_op_from_uop(&op, &__args->op.s[i]);
@@ -1618,11 +1609,12 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
}
init_completion(&job->complete);
- INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn);
- args.sched_entity = __args->sched_entity;
args.file_priv = __args->file_priv;
+ args.sched = __args->sched;
+ args.credits = 1;
+
args.in_sync.count = __args->in_sync.count;
args.in_sync.s = __args->in_sync.s;
@@ -1648,18 +1640,6 @@ err_free:
return ret;
}
-int
-nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
- void *data,
- struct drm_file *file_priv)
-{
- struct nouveau_cli *cli = nouveau_cli(file_priv);
- struct drm_nouveau_vm_init *init = data;
-
- return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr,
- init->kernel_managed_size);
-}
-
static int
nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args)
{
@@ -1760,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
if (ret)
return ret;
- args.sched_entity = &cli->sched_entity;
+ args.sched = &cli->sched;
args.file_priv = file_priv;
ret = nouveau_uvmm_vm_bind(&args);
@@ -1776,15 +1756,18 @@ void
nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem)
{
struct drm_gem_object *obj = &nvbo->bo.base;
+ struct drm_gpuvm_bo *vm_bo;
struct drm_gpuva *va;
dma_resv_assert_held(obj->resv);
- drm_gem_for_each_gpuva(va, obj) {
- struct nouveau_uvma *uvma = uvma_from_va(va);
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ drm_gpuvm_bo_for_each_va(va, vm_bo) {
+ struct nouveau_uvma *uvma = uvma_from_va(va);
- nouveau_uvma_map(uvma, mem);
- drm_gpuva_invalidate(va, false);
+ nouveau_uvma_map(uvma, mem);
+ drm_gpuva_invalidate(va, false);
+ }
}
}
@@ -1792,29 +1775,62 @@ void
nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
{
struct drm_gem_object *obj = &nvbo->bo.base;
+ struct drm_gpuvm_bo *vm_bo;
struct drm_gpuva *va;
dma_resv_assert_held(obj->resv);
- drm_gem_for_each_gpuva(va, obj) {
- struct nouveau_uvma *uvma = uvma_from_va(va);
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ drm_gpuvm_bo_for_each_va(va, vm_bo) {
+ struct nouveau_uvma *uvma = uvma_from_va(va);
- nouveau_uvma_unmap(uvma);
- drm_gpuva_invalidate(va, true);
+ nouveau_uvma_unmap(uvma);
+ drm_gpuva_invalidate(va, true);
+ }
}
}
+static void
+nouveau_uvmm_free(struct drm_gpuvm *gpuvm)
+{
+ struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm);
+
+ kfree(uvmm);
+}
+
+static int
+nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+ struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj);
+
+ return nouveau_bo_validate(nvbo, true, false);
+}
+
+static const struct drm_gpuvm_ops gpuvm_ops = {
+ .vm_free = nouveau_uvmm_free,
+ .vm_bo_validate = nouveau_uvmm_bo_validate,
+};
+
int
-nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
- u64 kernel_managed_addr, u64 kernel_managed_size)
+nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+ void *data,
+ struct drm_file *file_priv)
{
+ struct nouveau_uvmm *uvmm;
+ struct nouveau_cli *cli = nouveau_cli(file_priv);
+ struct drm_device *drm = cli->drm->dev;
+ struct drm_gem_object *r_obj;
+ struct drm_nouveau_vm_init *init = data;
+ u64 kernel_managed_end;
int ret;
- u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
- mutex_init(&uvmm->mutex);
- dma_resv_init(&uvmm->resv);
- mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
- mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+ if (check_add_overflow(init->kernel_managed_addr,
+ init->kernel_managed_size,
+ &kernel_managed_end))
+ return -EINVAL;
+
+ if (kernel_managed_end > NOUVEAU_VA_SPACE_END)
+ return -EINVAL;
mutex_lock(&cli->mutex);
@@ -1823,39 +1839,48 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
goto out_unlock;
}
- if (kernel_managed_end <= kernel_managed_addr) {
- ret = -EINVAL;
+ uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL);
+ if (!uvmm) {
+ ret = -ENOMEM;
goto out_unlock;
}
- if (kernel_managed_end > NOUVEAU_VA_SPACE_END) {
- ret = -EINVAL;
+ r_obj = drm_gpuvm_resv_object_alloc(drm);
+ if (!r_obj) {
+ kfree(uvmm);
+ ret = -ENOMEM;
goto out_unlock;
}
- uvmm->kernel_managed_addr = kernel_managed_addr;
- uvmm->kernel_managed_size = kernel_managed_size;
+ mutex_init(&uvmm->mutex);
+ mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
- drm_gpuvm_init(&uvmm->base, cli->name,
+ drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
- kernel_managed_addr, kernel_managed_size,
- NULL);
+ init->kernel_managed_addr,
+ init->kernel_managed_size,
+ &gpuvm_ops);
+ /* GPUVM takes care from here on. */
+ drm_gem_object_put(r_obj);
ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
cli->vmm.vmm.object.oclass, RAW,
- kernel_managed_addr, kernel_managed_size,
- NULL, 0, &cli->uvmm.vmm.vmm);
+ init->kernel_managed_addr,
+ init->kernel_managed_size,
+ NULL, 0, &uvmm->vmm.vmm);
if (ret)
- goto out_free_gpuva_mgr;
+ goto out_gpuvm_fini;
- cli->uvmm.vmm.cli = cli;
+ uvmm->vmm.cli = cli;
+ cli->uvmm.ptr = uvmm;
mutex_unlock(&cli->mutex);
return 0;
-out_free_gpuva_mgr:
- drm_gpuvm_destroy(&uvmm->base);
+out_gpuvm_fini:
+ drm_gpuvm_put(&uvmm->base);
out_unlock:
mutex_unlock(&cli->mutex);
return ret;
@@ -1867,15 +1892,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
MA_STATE(mas, &uvmm->region_mt, 0, 0);
struct nouveau_uvma_region *reg;
struct nouveau_cli *cli = uvmm->vmm.cli;
- struct nouveau_sched_entity *entity = &cli->sched_entity;
struct drm_gpuva *va, *next;
- if (!cli)
- return;
-
- rmb(); /* for list_empty to work without lock */
- wait_event(entity->job.wq, list_empty(&entity->job.list.head));
-
nouveau_uvmm_lock(uvmm);
drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) {
struct nouveau_uvma *uvma = uvma_from_va(va);
@@ -1910,8 +1928,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
mutex_lock(&cli->mutex);
nouveau_vmm_fini(&uvmm->vmm);
- drm_gpuvm_destroy(&uvmm->base);
+ drm_gpuvm_put(&uvmm->base);
mutex_unlock(&cli->mutex);
-
- dma_resv_fini(&uvmm->resv);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
index a308c59760a5..9d3c348581eb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -12,12 +12,6 @@ struct nouveau_uvmm {
struct nouveau_vmm vmm;
struct maple_tree region_mt;
struct mutex mutex;
- struct dma_resv resv;
-
- u64 kernel_managed_addr;
- u64 kernel_managed_size;
-
- bool disabled;
};
struct nouveau_uvma_region {
@@ -50,8 +44,6 @@ struct nouveau_uvmm_bind_job {
struct nouveau_job base;
struct kref kref;
- struct list_head entry;
- struct work_struct work;
struct completion complete;
/* struct bind_job_op */
@@ -60,7 +52,7 @@ struct nouveau_uvmm_bind_job {
struct nouveau_uvmm_bind_job_args {
struct drm_file *file_priv;
- struct nouveau_sched_entity *sched_entity;
+ struct nouveau_sched *sched;
unsigned int flags;
@@ -82,8 +74,6 @@ struct nouveau_uvmm_bind_job_args {
#define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base)
-int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
- u64 kernel_managed_addr, u64 kernel_managed_size);
void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm);
void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem);
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 5b71a5a5cd85..cdbc75e3d1f6 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -39,7 +39,7 @@ struct nv04_fence_priv {
static int
nv04_fence_emit(struct nouveau_fence *fence)
{
- struct nvif_push *push = fence->channel->chan.push;
+ struct nvif_push *push = unrcu_pointer(fence->channel)->chan.push;
int ret = PUSH_WAIT(push, 2);
if (ret == 0) {
PUSH_NVSQ(push, NV_SW, 0x0150, fence->base.seqno);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index 87a62d4ff4bd..7d4716dcd512 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -24,7 +24,6 @@
#include "chan.h"
#include "chid.h"
#include "cgrp.h"
-#include "chid.h"
#include "runl.h"
#include "priv.h"