summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-06-20 11:19:08 +1000
committerDave Airlie <airlied@redhat.com>2017-06-20 11:19:08 +1000
commit8c52f36413063bedbb3d31a65048a61ea2f1e169 (patch)
treec4eca52eba130c259b977832ff8dfc718a78aba8
parent3aaf4d95b07333af27c050511898d74a299fc743 (diff)
parent5ac55629d6b3fcde69f46aa772c6e83be0bdcbbf (diff)
downloadlinux-8c52f36413063bedbb3d31a65048a61ea2f1e169.tar.gz
linux-8c52f36413063bedbb3d31a65048a61ea2f1e169.tar.bz2
linux-8c52f36413063bedbb3d31a65048a61ea2f1e169.zip
Merge branch 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux into drm-next
A few more things for 4.13: - Semaphore support using sync objects - Drop fb location programming - Optimize bo list ioctl * 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux: drm/amdgpu: Optimize mutex usage (v4) drm/amdgpu: Optimization of AMDGPU_BO_LIST_OP_CREATE (v2) amdgpu: use drm sync objects for shared semaphores (v6) amdgpu/cs: split out fence dependency checking (v2) drm/amdgpu: don't check the default value for vm size
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2
-rw-r--r--include/uapi/drm/amdgpu_drm.h6
6 files changed, 210 insertions, 77 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e0adad590ecb..12d61edb3597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -869,6 +869,8 @@ struct amdgpu_fpriv {
struct amdgpu_bo_list {
struct mutex lock;
+ struct rcu_head rhead;
+ struct kref refcount;
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj;
@@ -1159,6 +1161,9 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
+
+ unsigned num_post_dep_syncobjs;
+ struct drm_syncobj **post_dep_syncobjs;
};
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 9f0247cdda5e..f621ee115c98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -35,33 +35,59 @@
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
-static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
- struct amdgpu_bo_list **result,
+static int amdgpu_bo_list_set(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct amdgpu_bo_list *list,
+ struct drm_amdgpu_bo_list_entry *info,
+ unsigned num_entries);
+
+static void amdgpu_bo_list_release_rcu(struct kref *ref)
+{
+ unsigned i;
+ struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+ refcount);
+
+ for (i = 0; i < list->num_entries; ++i)
+ amdgpu_bo_unref(&list->array[i].robj);
+
+ mutex_destroy(&list->lock);
+ kvfree(list->array);
+ kfree_rcu(list, rhead);
+}
+
+static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+ unsigned num_entries,
int *id)
{
int r;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_bo_list *list;
- *result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
- if (!*result)
+ list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
+ if (!list)
return -ENOMEM;
+ /* initialize bo list*/
+ mutex_init(&list->lock);
+ kref_init(&list->refcount);
+ r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
+ if (r) {
+ kfree(list);
+ return r;
+ }
+
+ /* idr alloc should be called only after initialization of bo list. */
mutex_lock(&fpriv->bo_list_lock);
- r = idr_alloc(&fpriv->bo_list_handles, *result,
- 1, 0, GFP_KERNEL);
+ r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+ mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
- mutex_unlock(&fpriv->bo_list_lock);
- kfree(*result);
+ kfree(list);
return r;
}
*id = r;
- mutex_init(&(*result)->lock);
- (*result)->num_entries = 0;
- (*result)->array = NULL;
-
- mutex_lock(&(*result)->lock);
- mutex_unlock(&fpriv->bo_list_lock);
-
return 0;
}
@@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
mutex_lock(&fpriv->bo_list_lock);
list = idr_remove(&fpriv->bo_list_handles, id);
- if (list) {
- /* Another user may have a reference to this list still */
- mutex_lock(&list->lock);
- mutex_unlock(&list->lock);
- amdgpu_bo_list_free(list);
- }
mutex_unlock(&fpriv->bo_list_lock);
+ if (list)
+ kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
}
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
{
struct amdgpu_bo_list *result;
- mutex_lock(&fpriv->bo_list_lock);
+ rcu_read_lock();
result = idr_find(&fpriv->bo_list_handles, id);
- if (result)
- mutex_lock(&result->lock);
- mutex_unlock(&fpriv->bo_list_lock);
+
+ if (result) {
+ if (kref_get_unless_zero(&result->refcount))
+ mutex_lock(&result->lock);
+ else
+ result = NULL;
+ }
+ rcu_read_unlock();
+
return result;
}
@@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
mutex_unlock(&list->lock);
+ kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
}
void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
@@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
- r = amdgpu_bo_list_create(fpriv, &list, &handle);
+ r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+ &handle);
if (r)
goto error_free;
-
- r = amdgpu_bo_list_set(adev, filp, list, info,
- args->in.bo_number);
- amdgpu_bo_list_put(list);
- if (r)
- goto error_free;
-
break;
case AMDGPU_BO_LIST_OP_DESTROY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a37bdf4f8e9b..aeee6840e82b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -154,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
break;
default:
@@ -682,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
+
+ for (i = 0; i < parser->num_post_dep_syncobjs; i++)
+ drm_syncobj_put(parser->post_dep_syncobjs[i]);
+ kfree(parser->post_dep_syncobjs);
+
dma_fence_put(parser->fence);
if (parser->ctx)
@@ -923,65 +931,150 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
+static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- int i, j, r;
+ unsigned num_deps;
+ int i, r;
+ struct drm_amdgpu_cs_chunk_dep *deps;
- for (i = 0; i < p->nchunks; ++i) {
- struct drm_amdgpu_cs_chunk_dep *deps;
- struct amdgpu_cs_chunk *chunk;
- unsigned num_deps;
+ deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
- chunk = &p->chunks[i];
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_ring *ring;
+ struct amdgpu_ctx *ctx;
+ struct dma_fence *fence;
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
- continue;
+ ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
+ deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &ring);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
- deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_dep);
+ fence = amdgpu_ctx_get_fence(ctx, ring,
+ deps[i].handle);
+ if (IS_ERR(fence)) {
+ r = PTR_ERR(fence);
+ amdgpu_ctx_put(ctx);
+ return r;
+ } else if (fence) {
+ r = amdgpu_sync_fence(p->adev, &p->job->sync,
+ fence);
+ dma_fence_put(fence);
+ amdgpu_ctx_put(ctx);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
- for (j = 0; j < num_deps; ++j) {
- struct amdgpu_ring *ring;
- struct amdgpu_ctx *ctx;
- struct dma_fence *fence;
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+ uint32_t handle)
+{
+ int r;
+ struct dma_fence *fence;
+ r = drm_syncobj_fence_get(p->filp, handle, &fence);
+ if (r)
+ return r;
- ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
- if (ctx == NULL)
- return -EINVAL;
+ r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
+ dma_fence_put(fence);
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
- deps[j].ip_type,
- deps[j].ip_instance,
- deps[j].ring, &ring);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
+ return r;
+}
- fence = amdgpu_ctx_get_fence(ctx, ring,
- deps[j].handle);
- if (IS_ERR(fence)) {
- r = PTR_ERR(fence);
- amdgpu_ctx_put(ctx);
- return r;
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ unsigned num_deps;
+ int i, r;
+ struct drm_amdgpu_cs_chunk_sem *deps;
- } else if (fence) {
- r = amdgpu_sync_fence(adev, &p->job->sync,
- fence);
- dma_fence_put(fence);
- amdgpu_ctx_put(ctx);
- if (r)
- return r;
- }
+ deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ unsigned num_deps;
+ int i;
+ struct drm_amdgpu_cs_chunk_sem *deps;
+ deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ p->post_dep_syncobjs = kmalloc_array(num_deps,
+ sizeof(struct drm_syncobj *),
+ GFP_KERNEL);
+ p->num_post_dep_syncobjs = 0;
+
+ for (i = 0; i < num_deps; ++i) {
+ p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_dep_syncobjs[i])
+ return -EINVAL;
+ p->num_post_dep_syncobjs++;
+ }
+ return 0;
+}
+
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ struct amdgpu_cs_parser *p)
+{
+ int i, r;
+
+ for (i = 0; i < p->nchunks; ++i) {
+ struct amdgpu_cs_chunk *chunk;
+
+ chunk = &p->chunks[i];
+
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+ r = amdgpu_cs_process_fence_dep(p, chunk);
+ if (r)
+ return r;
+ } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+ r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+ if (r)
+ return r;
+ } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+ r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+ if (r)
+ return r;
}
}
return 0;
}
+static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+ int i;
+
+ for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
+ drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
+ p->fence);
+ }
+}
+
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
@@ -1002,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp;
job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished);
+
+ amdgpu_cs_post_dependencies(p);
+
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job);
@@ -1009,7 +1105,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 875cde414be7..b2c960b2ea82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1073,6 +1073,10 @@ def_value:
static void amdgpu_check_vm_size(struct amdgpu_device *adev)
{
+ /* no need to check the default value */
+ if (amdgpu_vm_size == -1)
+ return;
+
if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
amdgpu_vm_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8168f8ec711a..4c7c2628ace4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -782,7 +782,7 @@ static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP |
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
+ DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 34128f698f5e..d9aa4a339650 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -440,6 +440,8 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_IB 0x01
#define AMDGPU_CHUNK_ID_FENCE 0x02
#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
struct drm_amdgpu_cs_chunk {
__u32 chunk_id;
@@ -507,6 +509,10 @@ struct drm_amdgpu_cs_chunk_fence {
__u32 offset;
};
+struct drm_amdgpu_cs_chunk_sem {
+ __u32 handle;
+};
+
struct drm_amdgpu_cs_chunk_data {
union {
struct drm_amdgpu_cs_chunk_ib ib_data;