diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2016-01-12 11:01:12 +0100 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2016-01-12 11:01:12 +0100 |
commit | 1f16f116b01c110db20ab808562c8b8bc3ee3d6e (patch) | |
tree | 44db563f64cf5f8d62af8f99a61e2b248c44ea3a /drivers/gpu/drm/amd | |
parent | 03724ac3d48f8f0e3caf1d30fa134f8fd96c94e2 (diff) | |
parent | f9eccf24615672896dc13251410c3f2f33a14f95 (diff) | |
download | linux-1f16f116b01c110db20ab808562c8b8bc3ee3d6e.tar.gz linux-1f16f116b01c110db20ab808562c8b8bc3ee3d6e.tar.bz2 linux-1f16f116b01c110db20ab808562c8b8bc3ee3d6e.zip |
Merge branches 'clockevents/4.4-fixes' and 'clockevents/4.5-fixes' of http://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
Pull in fixes from Daniel Lezcano:
- Fix the vt8500 timer leading to a system lock up when dealing with too
small delta (Roman Volkov)
- Select the CLKSRC_MMIO when the fsl_ftm_timer is enabled with COMPILE_TEST
(Daniel Lezcano)
- Prevent to compile timers using the 'iomem' API when the architecture has
not HAS_IOMEM set (Richard Weinberger)
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 67 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 108 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 27 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 24 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 127 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/scheduler/sched_fence.c | 13 |
19 files changed, 379 insertions, 142 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 306f75700bf8..048cfe073dae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -496,6 +496,7 @@ struct amdgpu_bo_va_mapping { /* bo virtual addresses in a specific vm */ struct amdgpu_bo_va { + struct mutex mutex; /* protected by bo being reserved */ struct list_head bo_list; struct fence *last_pt_update; @@ -538,6 +539,7 @@ struct amdgpu_bo { /* Constant after initialization */ struct amdgpu_device *adev; struct drm_gem_object gem_base; + struct amdgpu_bo *parent; struct ttm_bo_kmap_obj dma_buf_vmap; pid_t pid; @@ -928,8 +930,6 @@ struct amdgpu_vm_id { }; struct amdgpu_vm { - struct mutex mutex; - struct rb_root va; /* protecting invalidated */ @@ -956,6 +956,8 @@ struct amdgpu_vm { struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; /* for interval tree */ spinlock_t it_lock; + /* protecting freed */ + spinlock_t freed_lock; }; struct amdgpu_vm_manager { @@ -1262,7 +1264,8 @@ struct amdgpu_cs_parser { struct ww_acquire_ctx ticket; /* user fence */ - struct amdgpu_user_fence uf; + struct amdgpu_user_fence uf; + struct amdgpu_bo_list_entry uf_entry; }; struct amdgpu_job { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3afcf0237c25..25a3e2485cc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -127,6 +127,37 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, return 0; } +static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_fence *fence_data) +{ + struct drm_gem_object *gobj; + uint32_t handle; + + handle = fence_data->handle; + gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, + fence_data->handle); + if (gobj == NULL) + return -EINVAL; + + p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + p->uf.offset = fence_data->offset; + + if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) { + drm_gem_object_unreference_unlocked(gobj); + return -EINVAL; + } + + p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo); + p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT; + p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + p->uf_entry.priority = 0; + p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; + p->uf_entry.tv.shared = true; + + drm_gem_object_unreference_unlocked(gobj); + return 0; +} + int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { union drm_amdgpu_cs *cs = data; @@ -207,26 +238,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) case AMDGPU_CHUNK_ID_FENCE: size = sizeof(struct drm_amdgpu_cs_chunk_fence); - if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) { - uint32_t handle; - struct drm_gem_object *gobj; - struct drm_amdgpu_cs_chunk_fence *fence_data; - - fence_data = (void *)p->chunks[i].kdata; - handle = fence_data->handle; - gobj = drm_gem_object_lookup(p->adev->ddev, - p->filp, handle); - if (gobj == NULL) { - ret = -EINVAL; - goto free_partial_kdata; - } - - p->uf.bo = gem_to_amdgpu_bo(gobj); - p->uf.offset = fence_data->offset; - } else { + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { ret = -EINVAL; goto free_partial_kdata; } + + ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata); + if (ret) + goto free_partial_kdata; + break; case AMDGPU_CHUNK_ID_DEPENDENCIES: @@ -389,6 +409,9 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) p->vm_bos = amdgpu_vm_get_bos(p->adev, &fpriv->vm, &p->validated); + if (p->uf.bo) + list_add(&p->uf_entry.tv.head, &p->validated); + if (need_mmap_lock) down_read(¤t->mm->mmap_sem); @@ -486,8 +509,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo for (i = 0; i < parser->num_ibs; i++) amdgpu_ib_free(parser->adev, &parser->ibs[i]); kfree(parser->ibs); - if (parser->uf.bo) - drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); + amdgpu_bo_unref(&parser->uf.bo); + amdgpu_bo_unref(&parser->uf_entry.robj); } static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, @@ -776,7 +799,7 @@ static int amdgpu_cs_free_job(struct amdgpu_job *job) amdgpu_ib_free(job->adev, &job->ibs[i]); kfree(job->ibs); if (job->uf.bo) - drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base); + amdgpu_bo_unref(&job->uf.bo); return 0; } @@ -784,8 +807,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; union drm_amdgpu_cs *cs = data; - struct amdgpu_fpriv *fpriv = filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; int i, r; @@ -803,7 +824,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_handle_lockup(adev, r); return r; } - mutex_lock(&vm->mutex); r = amdgpu_cs_parser_relocs(&parser); if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); @@ -888,7 +908,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); - mutex_unlock(&vm->mutex); r = amdgpu_cs_handle_lockup(adev, r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index e173a5a02f0d..5580d3420c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -73,6 +73,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work) struct drm_crtc *crtc = &amdgpuCrtc->base; unsigned long flags; unsigned i; + int vpos, hpos, stat, min_udelay; + struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; amdgpu_flip_wait_fence(adev, &work->excl); for (i = 0; i < work->shared_count; ++i) @@ -81,6 +83,41 @@ static void amdgpu_flip_work_func(struct work_struct *__work) /* We borrow the event spin lock for protecting flip_status */ spin_lock_irqsave(&crtc->dev->event_lock, flags); + /* If this happens to execute within the "virtually extended" vblank + * interval before the start of the real vblank interval then it needs + * to delay programming the mmio flip until the real vblank is entered. + * This prevents completing a flip too early due to the way we fudge + * our vblank counter and vblank timestamps in order to work around the + * problem that the hw fires vblank interrupts before actual start of + * vblank (when line buffer refilling is done for a frame). It + * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for + * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts. + * + * In practice this won't execute very often unless on very fast + * machines because the time window for this to happen is very small. + */ + for (;;) { + /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank + * start in hpos, and to the "fudged earlier" vblank start in + * vpos. + */ + stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, + GET_DISTANCE_TO_VBLANKSTART, + &vpos, &hpos, NULL, NULL, + &crtc->hwmode); + + if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) != + (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) || + !(vpos >= 0 && hpos <= 0)) + break; + + /* Sleep at least until estimated real start of hw vblank */ + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5); + usleep_range(min_udelay, 2 * min_udelay); + spin_lock_irqsave(&crtc->dev->event_lock, flags); + }; + /* do the flip (mmio) */ adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); /* set the flip status */ @@ -109,7 +146,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) } else DRM_ERROR("failed to reserve buffer after flip\n"); - drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); + amdgpu_bo_unref(&work->old_rbo); kfree(work->shared); kfree(work); } @@ -148,8 +185,8 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, obj = old_amdgpu_fb->obj; /* take a reference to the old object */ - drm_gem_object_reference(obj); work->old_rbo = gem_to_amdgpu_bo(obj); + amdgpu_bo_ref(work->old_rbo); new_amdgpu_fb = to_amdgpu_framebuffer(fb); obj = new_amdgpu_fb->obj; @@ -222,7 +259,7 @@ pflip_cleanup: amdgpu_bo_unreserve(new_rbo); cleanup: - drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); + amdgpu_bo_unref(&work->old_rbo); fence_put(work->excl); for (i = 0; i < work->shared_count; ++i) fence_put(work->shared[i]); @@ -712,6 +749,15 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, * \param dev Device to query. * \param pipe Crtc to query. * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0). + * For driver internal use only also supports these flags: + * + * USE_REAL_VBLANKSTART to use the real start of vblank instead + * of a fudged earlier start of vblank. + * + * GET_DISTANCE_TO_VBLANKSTART to return distance to the + * fudged earlier start of vblank in *vpos and the distance + * to true start of vblank in *hpos. + * * \param *vpos Location where vertical scanout position should be stored. * \param *hpos Location where horizontal scanout position should go. * \param *stime Target location for timestamp taken immediately before @@ -776,10 +822,40 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, vbl_end = 0; } + /* Called from driver internal vblank counter query code? */ + if (flags & GET_DISTANCE_TO_VBLANKSTART) { + /* Caller wants distance from real vbl_start in *hpos */ + *hpos = *vpos - vbl_start; + } + + /* Fudge vblank to start a few scanlines earlier to handle the + * problem that vblank irqs fire a few scanlines before start + * of vblank. Some driver internal callers need the true vblank + * start to be used and signal this via the USE_REAL_VBLANKSTART flag. + * + * The cause of the "early" vblank irq is that the irq is triggered + * by the line buffer logic when the line buffer read position enters + * the vblank, whereas our crtc scanout position naturally lags the + * line buffer read position. + */ + if (!(flags & USE_REAL_VBLANKSTART)) + vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines; + /* Test scanout position against vblank region. */ if ((*vpos < vbl_start) && (*vpos >= vbl_end)) in_vbl = false; + /* In vblank? */ + if (in_vbl) + ret |= DRM_SCANOUTPOS_IN_VBLANK; + + /* Called from driver internal vblank counter query code? */ + if (flags & GET_DISTANCE_TO_VBLANKSTART) { + /* Caller wants distance from fudged earlier vbl_start */ + *vpos -= vbl_start; + return ret; + } + /* Check if inside vblank area and apply corrective offsets: * vpos will then be >=0 in video scanout area, but negative * within vblank area, counting down the number of lines until @@ -795,32 +871,6 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, /* Correct for shifted end of vbl at vbl_end. */ *vpos = *vpos - vbl_end; - /* In vblank? */ - if (in_vbl) - ret |= DRM_SCANOUTPOS_IN_VBLANK; - - /* Is vpos outside nominal vblank area, but less than - * 1/100 of a frame height away from start of vblank? - * If so, assume this isn't a massively delayed vblank - * interrupt, but a vblank interrupt that fired a few - * microseconds before true start of vblank. Compensate - * by adding a full frame duration to the final timestamp. - * Happens, e.g., on ATI R500, R600. - * - * We only do this if DRM_CALLED_FROM_VBLIRQ. - */ - if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) { - vbl_start = mode->crtc_vdisplay; - vtotal = mode->crtc_vtotal; - - if (vbl_start - *vpos < vtotal / 100) { - *vpos -= vtotal; - - /* Signal this correction as "applied". */ - ret |= 0x8; - } - } - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 00c5b580f56c..9c253c535d26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -115,12 +115,9 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; int r; - mutex_lock(&vm->mutex); r = amdgpu_bo_reserve(rbo, false); - if (r) { - mutex_unlock(&vm->mutex); + if (r) return r; - } bo_va = amdgpu_vm_bo_find(vm, rbo); if (!bo_va) { @@ -129,7 +126,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri ++bo_va->ref_count; } amdgpu_bo_unreserve(rbo); - mutex_unlock(&vm->mutex); return 0; } @@ -142,10 +138,8 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; int r; - mutex_lock(&vm->mutex); r = amdgpu_bo_reserve(rbo, true); if (r) { - mutex_unlock(&vm->mutex); dev_err(adev->dev, "leaking bo va because " "we fail to reserve bo (%d)\n", r); return; @@ -157,7 +151,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, } } amdgpu_bo_unreserve(rbo); - mutex_unlock(&vm->mutex); } static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) @@ -242,8 +235,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_USERPTR_REGISTER)) return -EINVAL; - if (!(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) || - !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) { + if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && ( + !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) || + !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) { /* if we want to write to it we must require anonymous memory and install a MMU notifier */ @@ -483,6 +477,14 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (domain == AMDGPU_GEM_DOMAIN_CPU) goto error_unreserve; } + list_for_each_entry(entry, &duplicates, head) { + domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type); + /* if anything is swapped out don't swap it in here, + just abort and wait for the next CS */ + if (domain == AMDGPU_GEM_DOMAIN_CPU) + goto error_unreserve; + } + r = amdgpu_vm_update_page_directory(adev, bo_va->vm); if (r) goto error_unreserve; @@ -553,7 +555,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, gobj = drm_gem_object_lookup(dev, filp, args->handle); if (gobj == NULL) return -ENOENT; - mutex_lock(&fpriv->vm.mutex); rbo = gem_to_amdgpu_bo(gobj); INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); @@ -568,7 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); if (r) { - mutex_unlock(&fpriv->vm.mutex); drm_gem_object_unreference_unlocked(gobj); return r; } @@ -577,7 +577,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, if (!bo_va) { ttm_eu_backoff_reservation(&ticket, &list); drm_gem_object_unreference_unlocked(gobj); - mutex_unlock(&fpriv->vm.mutex); return -ENOENT; } @@ -602,7 +601,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ttm_eu_backoff_reservation(&ticket, &list); if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) amdgpu_gem_va_update_vm(adev, bo_va, args->operation); - mutex_unlock(&fpriv->vm.mutex); + drm_gem_object_unreference_unlocked(gobj); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1618e2294a16..e23843f4d877 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -611,13 +611,59 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev, u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe) { struct amdgpu_device *adev = dev->dev_private; + int vpos, hpos, stat; + u32 count; if (pipe >= adev->mode_info.num_crtc) { DRM_ERROR("Invalid crtc %u\n", pipe); return -EINVAL; } - return amdgpu_display_vblank_get_counter(adev, pipe); + /* The hw increments its frame counter at start of vsync, not at start + * of vblank, as is required by DRM core vblank counter handling. + * Cook the hw count here to make it appear to the caller as if it + * incremented at start of vblank. We measure distance to start of + * vblank in vpos. vpos therefore will be >= 0 between start of vblank + * and start of vsync, so vpos >= 0 means to bump the hw frame counter + * result by 1 to give the proper appearance to caller. + */ + if (adev->mode_info.crtcs[pipe]) { + /* Repeat readout if needed to provide stable result if + * we cross start of vsync during the queries. + */ + do { + count = amdgpu_display_vblank_get_counter(adev, pipe); + /* Ask amdgpu_get_crtc_scanoutpos to return vpos as + * distance to start of vblank, instead of regular + * vertical scanout pos. + */ + stat = amdgpu_get_crtc_scanoutpos( + dev, pipe, GET_DISTANCE_TO_VBLANKSTART, + &vpos, &hpos, NULL, NULL, + &adev->mode_info.crtcs[pipe]->base.hwmode); + } while (count != amdgpu_display_vblank_get_counter(adev, pipe)); + + if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) != + (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) { + DRM_DEBUG_VBL("Query failed! stat %d\n", stat); + } else { + DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n", + pipe, vpos); + + /* Bump counter if we are at >= leading edge of vblank, + * but before vsync where vpos would turn negative and + * the hw counter really increments. + */ + if (vpos >= 0) + count++; + } + } else { + /* Fallback to use value as is. */ + count = amdgpu_display_vblank_get_counter(adev, pipe); + DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n"); + } + + return count; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index b62c1710cab6..064ebb347074 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -407,6 +407,7 @@ struct amdgpu_crtc { u32 line_time; u32 wm_low; u32 wm_high; + u32 lb_vblank_lead_lines; struct drm_display_mode hw_mode; }; @@ -528,6 +529,10 @@ struct amdgpu_framebuffer { #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ ((em) == ATOM_ENCODER_MODE_DP_MST)) +/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ +#define USE_REAL_VBLANKSTART (1 << 30) +#define GET_DISTANCE_TO_VBLANKSTART (1 << 31) + void amdgpu_link_encoder_connector(struct drm_device *dev); struct drm_connector * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0d524384ff79..c3ce103b6a33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -100,6 +100,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) list_del_init(&bo->list); mutex_unlock(&bo->adev->gem.mutex); drm_gem_object_release(&bo->gem_base); + amdgpu_bo_unref(&bo->parent); kfree(bo->metadata); kfree(bo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d4bac5f49939..8a1752ff3d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -587,9 +587,13 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); int r; - if (gtt->userptr) - amdgpu_ttm_tt_pin_userptr(ttm); - + if (gtt->userptr) { + r = amdgpu_ttm_tt_pin_userptr(ttm); + if (r) { + DRM_ERROR("failed to pin userptr\n"); + return r; + } + } gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", @@ -797,11 +801,12 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, if (mem && mem->mem_type != TTM_PL_SYSTEM) flags |= AMDGPU_PTE_VALID; - if (mem && mem->mem_type == TTM_PL_TT) + if (mem && mem->mem_type == TTM_PL_TT) { flags |= AMDGPU_PTE_SYSTEM; - if (!ttm || ttm->caching_state == tt_cached) - flags |= AMDGPU_PTE_SNOOPED; + if (ttm->caching_state == tt_cached) + flags |= AMDGPU_PTE_SNOOPED; + } if (adev->asic_type >= CHIP_TOPAZ) flags |= AMDGPU_PTE_EXECUTABLE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 03f0c3bae516..a745eeeb5d82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -392,7 +392,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = 0x00000030; /* len */ + if ((ring->adev->vce.fw_version >> 24) >= 52) + ib->ptr[ib->length_dw++] = 0x00000040; /* len */ + else + ib->ptr[ib->length_dw++] = 0x00000030; /* len */ ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000042; @@ -404,6 +407,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000100; ib->ptr[ib->length_dw++] = 0x0000000c; ib->ptr[ib->length_dw++] = 0x00000000; + if ((ring->adev->vce.fw_version >> 24) >= 52) { + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + } ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 159ce54bbd8d..b53d273eb7a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -885,17 +885,21 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; int r; + spin_lock(&vm->freed_lock); while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - + spin_unlock(&vm->freed_lock); r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); kfree(mapping); if (r) return r; + spin_lock(&vm->freed_lock); } + spin_unlock(&vm->freed_lock); + return 0; } @@ -922,8 +926,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, vm_status); spin_unlock(&vm->status_lock); - + mutex_lock(&bo_va->mutex); r = amdgpu_vm_bo_update(adev, bo_va, NULL); + mutex_unlock(&bo_va->mutex); if (r) return r; @@ -967,7 +972,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->vm_status); - + mutex_init(&bo_va->mutex); list_add_tail(&bo_va->bo_list, &bo->va); return bo_va; @@ -1045,7 +1050,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mapping->offset = offset; mapping->flags = flags; + mutex_lock(&bo_va->mutex); list_add(&mapping->list, &bo_va->invalids); + mutex_unlock(&bo_va->mutex); spin_lock(&vm->it_lock); interval_tree_insert(&mapping->it, &vm->va); spin_unlock(&vm->it_lock); @@ -1076,6 +1083,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (r) goto error_free; + /* Keep a reference to the page table to avoid freeing + * them up in the wrong order. + */ + pt->parent = amdgpu_bo_ref(vm->page_directory); + r = amdgpu_vm_clear_bo(adev, pt); if (r) { amdgpu_bo_unref(&pt); @@ -1121,7 +1133,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; - + mutex_lock(&bo_va->mutex); list_for_each_entry(mapping, &bo_va->valids, list) { if (mapping->it.start == saddr) break; @@ -1135,20 +1147,25 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, break; } - if (&mapping->list == &bo_va->invalids) + if (&mapping->list == &bo_va->invalids) { + mutex_unlock(&bo_va->mutex); return -ENOENT; + } } - + mutex_unlock(&bo_va->mutex); list_del(&mapping->list); spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - if (valid) + if (valid) { + spin_lock(&vm->freed_lock); list_add(&mapping->list, &vm->freed); - else + spin_unlock(&vm->freed_lock); + } else { kfree(mapping); + } return 0; } @@ -1181,7 +1198,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, interval_tree_remove(&mapping->it, &vm->va); spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); + spin_lock(&vm->freed_lock); list_add(&mapping->list, &vm->freed); + spin_unlock(&vm->freed_lock); } list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_del(&mapping->list); @@ -1190,8 +1209,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, spin_unlock(&vm->it_lock); kfree(mapping); } - fence_put(bo_va->last_pt_update); + mutex_destroy(&bo_va->mutex); kfree(bo_va); } @@ -1236,13 +1255,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->ids[i].id = 0; vm->ids[i].flushed_updates = NULL; } - mutex_init(&vm->mutex); vm->va = RB_ROOT; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); spin_lock_init(&vm->it_lock); + spin_lock_init(&vm->freed_lock); pd_size = amdgpu_vm_directory_size(adev); pd_entries = amdgpu_vm_num_pdes(adev); @@ -1320,7 +1339,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) fence_put(vm->ids[i].flushed_updates); } - mutex_destroy(&vm->mutex); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index cb0f7747e3dc..4dcc8fba5792 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1250,7 +1250,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, u32 pixel_period; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; - u32 tmp, wm_mask; + u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { pixel_period = 1000000 / (u32)mode->clock; @@ -1333,6 +1333,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, (adev->mode_info.disp_priority == 2)) { DRM_DEBUG_KMS("force priority to high\n"); } + lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); } /* select wm A */ @@ -1357,6 +1358,8 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, amdgpu_crtc->line_time = line_time; amdgpu_crtc->wm_high = latency_watermark_a; amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ + amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 5af3721851d6..8f1e51128b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1238,7 +1238,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, u32 pixel_period; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; - u32 tmp, wm_mask; + u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { pixel_period = 1000000 / (u32)mode->clock; @@ -1321,6 +1321,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, (adev->mode_info.disp_priority == 2)) { DRM_DEBUG_KMS("force priority to high\n"); } + lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); } /* select wm A */ @@ -1345,6 +1346,8 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, amdgpu_crtc->line_time = line_time; amdgpu_crtc->wm_high = latency_watermark_a; amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ + amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 4f7b49a6dc50..42d954dc436d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1193,7 +1193,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, u32 pixel_period; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; - u32 tmp, wm_mask; + u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { pixel_period = 1000000 / (u32)mode->clock; @@ -1276,6 +1276,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, (adev->mode_info.disp_priority == 2)) { DRM_DEBUG_KMS("force priority to high\n"); } + lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); } /* select wm A */ @@ -1302,6 +1303,8 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, amdgpu_crtc->line_time = line_time; amdgpu_crtc->wm_high = latency_watermark_a; amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ + amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 7427d8cd4c43..ed8abb58a785 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -513,7 +513,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_L2_CNTL3, tmp); /* setup context0 */ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index cb0e50ebb528..d39028440814 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -657,7 +657,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_L2_CNTL4, tmp); /* setup context0 */ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6a52db6ad8d7..370c6c9d81c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -40,6 +40,9 @@ #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 +#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616 +#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617 +#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618 #define VCE_V3_0_FW_SIZE (384 * 1024) #define VCE_V3_0_STACK_SIZE (64 * 1024) @@ -130,9 +133,11 @@ static int vce_v3_0_start(struct amdgpu_device *adev) /* set BUSY flag */ WREG32_P(mmVCE_STATUS, 1, ~1); - - WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, - ~VCE_VCPU_CNTL__CLK_EN_MASK); + if (adev->asic_type >= CHIP_STONEY) + WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001); + else + WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, + ~VCE_VCPU_CNTL__CLK_EN_MASK); WREG32_P(mmVCE_SOFT_RESET, VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, @@ -391,8 +396,12 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) WREG32(mmVCE_LMI_SWAP_CNTL, 0); WREG32(mmVCE_LMI_SWAP_CNTL1, 0); WREG32(mmVCE_LMI_VM_CTRL, 0); - - WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); + if (adev->asic_type >= CHIP_STONEY) { + WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8)); + WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8)); + WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8)); + } else + WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V3_0_FW_SIZE; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); @@ -576,6 +585,11 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_DEBUG("IH: VCE\n"); + + WREG32_P(mmVCE_SYS_INT_STATUS, + VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, + ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); + switch (entry->src_data) { case 0: amdgpu_fence_process(&adev->vce.ring[0]); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ea30d6ad4c13..3a4820e863ec 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -30,8 +30,7 @@ #define CREATE_TRACE_POINTS #include "gpu_sched_trace.h" -static struct amd_sched_job * -amd_sched_entity_pop_job(struct amd_sched_entity *entity); +static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); struct kmem_cache *sched_fence_slab; @@ -64,36 +63,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, } /** - * Select next job from a specified run queue with round robin policy. - * Return NULL if nothing available. + * Select an entity which could provide a job to run + * + * @rq The run queue to check. + * + * Try to find a ready entity, returns NULL if none found. */ -static struct amd_sched_job * -amd_sched_rq_select_job(struct amd_sched_rq *rq) +static struct amd_sched_entity * +amd_sched_rq_select_entity(struct amd_sched_rq *rq) { struct amd_sched_entity *entity; - struct amd_sched_job *sched_job; spin_lock(&rq->lock); entity = rq->current_entity; if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { - sched_job = amd_sched_entity_pop_job(entity); - if (sched_job) { + if (amd_sched_entity_is_ready(entity)) { rq->current_entity = entity; spin_unlock(&rq->lock); - return sched_job; + return entity; } } } list_for_each_entry(entity, &rq->entities, list) { - sched_job = amd_sched_entity_pop_job(entity); - if (sched_job) { + if (amd_sched_entity_is_ready(entity)) { rq->current_entity = entity; spin_unlock(&rq->lock); - return sched_job; + return entity; } if (entity == rq->current_entity) @@ -177,6 +176,24 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) } /** + * Check if entity is ready + * + * @entity The pointer to a valid scheduler entity + * + * Return true if entity could provide a job. + */ +static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) +{ + if (kfifo_is_empty(&entity->job_queue)) + return false; + + if (ACCESS_ONCE(entity->dependency)) + return false; + + return true; +} + +/** * Destroy a context entity * * @sched Pointer to scheduler instance @@ -211,32 +228,53 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) amd_sched_wakeup(entity->sched); } +static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) +{ + struct amd_gpu_scheduler *sched = entity->sched; + struct fence * fence = entity->dependency; + struct amd_sched_fence *s_fence; + + if (fence->context == entity->fence_context) { + /* We can ignore fences from ourself */ + fence_put(entity->dependency); + return false; + } + + s_fence = to_amd_sched_fence(fence); + if (s_fence && s_fence->sched == sched) { + /* Fence is from the same scheduler */ + if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) { + /* Ignore it when it is already scheduled */ + fence_put(entity->dependency); + return false; + } + + /* Wait for fence to be scheduled */ + entity->cb.func = amd_sched_entity_wakeup; + list_add_tail(&entity->cb.node, &s_fence->scheduled_cb); + return true; + } + + if (!fence_add_callback(entity->dependency, &entity->cb, + amd_sched_entity_wakeup)) + return true; + + fence_put(entity->dependency); + return false; +} + static struct amd_sched_job * amd_sched_entity_pop_job(struct amd_sched_entity *entity) { struct amd_gpu_scheduler *sched = entity->sched; struct amd_sched_job *sched_job; - if (ACCESS_ONCE(entity->dependency)) - return NULL; - if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job))) return NULL; - while ((entity->dependency = sched->ops->dependency(sched_job))) { - - if (entity->dependency->context == entity->fence_context) { - /* We can ignore fences from ourself */ - fence_put(entity->dependency); - continue; - } - - if (fence_add_callback(entity->dependency, &entity->cb, - amd_sched_entity_wakeup)) - fence_put(entity->dependency); - else + while ((entity->dependency = sched->ops->dependency(sched_job))) + if (amd_sched_entity_add_dependency_cb(entity)) return NULL; - } return sched_job; } @@ -250,6 +288,7 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity) */ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) { + struct amd_gpu_scheduler *sched = sched_job->sched; struct amd_sched_entity *entity = sched_job->s_entity; bool added, first = false; @@ -264,7 +303,7 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) /* first job wakes up scheduler */ if (first) - amd_sched_wakeup(sched_job->sched); + amd_sched_wakeup(sched); return added; } @@ -280,9 +319,9 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job) { struct amd_sched_entity *entity = sched_job->s_entity; + trace_amd_sched_job(sched_job); wait_event(entity->sched->job_scheduled, amd_sched_entity_in(sched_job)); - trace_amd_sched_job(sched_job); } /** @@ -304,22 +343,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) } /** - * Select next to run + * Select next entity to process */ -static struct amd_sched_job * -amd_sched_select_job(struct amd_gpu_scheduler *sched) +static struct amd_sched_entity * +amd_sched_select_entity(struct amd_gpu_scheduler *sched) { - struct amd_sched_job *sched_job; + struct amd_sched_entity *entity; if (!amd_sched_ready(sched)) return NULL; /* Kernel run queue has higher priority than normal run queue*/ - sched_job = amd_sched_rq_select_job(&sched->kernel_rq); - if (sched_job == NULL) - sched_job = amd_sched_rq_select_job(&sched->sched_rq); + entity = amd_sched_rq_select_entity(&sched->kernel_rq); + if (entity == NULL) + entity = amd_sched_rq_select_entity(&sched->sched_rq); - return sched_job; + return entity; } static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) @@ -381,13 +420,16 @@ static int amd_sched_main(void *param) unsigned long flags; wait_event_interruptible(sched->wake_up_worker, - kthread_should_stop() || - (sched_job = amd_sched_select_job(sched))); + (entity = amd_sched_select_entity(sched)) || + kthread_should_stop()); + if (!entity) + continue; + + sched_job = amd_sched_entity_pop_job(entity); if (!sched_job) continue; - entity = sched_job->s_entity; s_fence = sched_job->s_fence; if (sched->timeout != MAX_SCHEDULE_TIMEOUT) { @@ -400,6 +442,7 @@ static int amd_sched_main(void *param) atomic_inc(&sched->hw_rq_count); fence = sched->ops->run_job(sched_job); + amd_sched_fence_scheduled(s_fence); if (fence) { r = fence_add_callback(fence, &s_fence->cb, amd_sched_process_job); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 939692b14f4b..a0f0ae53aacd 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -27,6 +27,8 @@ #include <linux/kfifo.h> #include <linux/fence.h> +#define AMD_SCHED_FENCE_SCHEDULED_BIT FENCE_FLAG_USER_BITS + struct amd_gpu_scheduler; struct amd_sched_rq; @@ -68,6 +70,7 @@ struct amd_sched_rq { struct amd_sched_fence { struct fence base; struct fence_cb cb; + struct list_head scheduled_cb; struct amd_gpu_scheduler *sched; spinlock_t lock; void *owner; @@ -134,7 +137,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job); struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_entity *s_entity, void *owner); +void amd_sched_fence_scheduled(struct amd_sched_fence *fence); void amd_sched_fence_signal(struct amd_sched_fence *fence); - #endif diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index 8d2130b9ff05..87c78eecea64 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -35,6 +35,8 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL); if (fence == NULL) return NULL; + + INIT_LIST_HEAD(&fence->scheduled_cb); fence->owner = owner; fence->sched = s_entity->sched; spin_lock_init(&fence->lock); @@ -55,6 +57,17 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence) FENCE_TRACE(&fence->base, "was already signaled\n"); } +void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence) +{ + struct fence_cb *cur, *tmp; + + set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags); + list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) { + list_del_init(&cur->node); + cur->func(&s_fence->base, cur); + } +} + static const char *amd_sched_fence_get_driver_name(struct fence *fence) { return "amd_sched"; |