diff options
Diffstat (limited to 'drivers/gpu/drm')
57 files changed, 825 insertions, 450 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 66e40398b3d3..e620807418ea 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ +obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ @@ -67,4 +68,3 @@ obj-$(CONFIG_DRM_IMX) += imx/ obj-y += i2c/ obj-y += panel/ obj-y += bridge/ -obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4f7b275f2f7b..fcfdf23e1913 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -31,7 +31,6 @@ #include <uapi/linux/kfd_ioctl.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/uaccess.h> #include <uapi/asm-generic/mman-common.h> #include <asm/processor.h> #include "kfd_priv.h" @@ -121,27 +120,20 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); - process->is_32bit_user_mode = is_32bit_user_mode; - dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", process->pasid, process->is_32bit_user_mode); - kfd_init_apertures(process); - return 0; } -static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_get_version_args args; + struct kfd_ioctl_get_version_args *args = data; int err = 0; - args.major_version = KFD_IOCTL_MAJOR_VERSION; - args.minor_version = KFD_IOCTL_MINOR_VERSION; - - if (copy_to_user(arg, &args, sizeof(args))) - err = -EFAULT; + args->major_version = KFD_IOCTL_MAJOR_VERSION; + args->minor_version = KFD_IOCTL_MINOR_VERSION; return err; } @@ -225,10 +217,10 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return 0; } -static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_create_queue_args args; + struct kfd_ioctl_create_queue_args *args = data; struct kfd_dev *dev; int err = 0; unsigned int queue_id; @@ -237,16 +229,13 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, memset(&q_properties, 0, sizeof(struct queue_properties)); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - pr_debug("kfd: creating queue ioctl\n"); - err = set_queue_properties_from_user(&q_properties, &args); + err = set_queue_properties_from_user(&q_properties, args); if (err) return err; - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -254,7 +243,7 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto err_bind_process; } @@ -267,33 +256,26 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (err != 0) goto err_create_queue; - args.queue_id = queue_id; + args->queue_id = queue_id; /* Return gpu_id as doorbell offset for mmap usage */ - args.doorbell_offset = args.gpu_id << PAGE_SHIFT; - - if (copy_to_user(arg, &args, sizeof(args))) { - err = -EFAULT; - goto err_copy_args_out; - } + args->doorbell_offset = args->gpu_id << PAGE_SHIFT; mutex_unlock(&p->mutex); - pr_debug("kfd: queue id %d was created successfully\n", args.queue_id); + pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); pr_debug("ring buffer address == 0x%016llX\n", - args.ring_base_address); + args->ring_base_address); pr_debug("read ptr address == 0x%016llX\n", - args.read_pointer_address); + args->read_pointer_address); pr_debug("write ptr address == 0x%016llX\n", - args.write_pointer_address); + args->write_pointer_address); return 0; -err_copy_args_out: - pqm_destroy_queue(&p->pqm, queue_id); err_create_queue: err_bind_process: mutex_unlock(&p->mutex); @@ -301,99 +283,90 @@ err_bind_process: } static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_destroy_queue_args args; - - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; + struct kfd_ioctl_destroy_queue_args *args = data; pr_debug("kfd: destroying queue id %d for PASID %d\n", - args.queue_id, + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_destroy_queue(&p->pqm, args.queue_id); + retval = pqm_destroy_queue(&p->pqm, args->queue_id); mutex_unlock(&p->mutex); return retval; } static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_update_queue_args args; + struct kfd_ioctl_update_queue_args *args = data; struct queue_properties properties; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } - if (args.queue_priority > KFD_MAX_QUEUE_PRIORITY) { + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } - if ((args.ring_base_address) && + if ((args->ring_base_address) && (!access_ok(VERIFY_WRITE, - (const void __user *) args.ring_base_address, + (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { pr_err("kfd: can't access ring base address\n"); return -EFAULT; } - if (!is_power_of_2(args.ring_size) && (args.ring_size != 0)) { + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { pr_err("kfd: ring size must be a power of 2 or 0\n"); return -EINVAL; } - properties.queue_address = args.ring_base_address; - properties.queue_size = args.ring_size; - properties.queue_percent = args.queue_percentage; - properties.priority = args.queue_priority; + properties.queue_address = args->ring_base_address; + properties.queue_size = args->ring_size; + properties.queue_percent = args->queue_percentage; + properties.priority = args->queue_priority; pr_debug("kfd: updating queue id %d for PASID %d\n", - args.queue_id, p->pasid); + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_update_queue(&p->pqm, args.queue_id, &properties); + retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); mutex_unlock(&p->mutex); return retval; } -static long kfd_ioctl_set_memory_policy(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_set_memory_policy(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_set_memory_policy_args args; + struct kfd_ioctl_set_memory_policy_args *args = data; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; enum cache_policy default_policy, alternate_policy; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -401,23 +374,23 @@ static long kfd_ioctl_set_memory_policy(struct file *filep, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto out; } - default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT) + default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; alternate_policy = - (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) + (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; if (!dev->dqm->set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, - (void __user *)args.alternate_aperture_base, - args.alternate_aperture_size)) + (void __user *)args->alternate_aperture_base, + args->alternate_aperture_size)) err = -EINVAL; out: @@ -426,53 +399,44 @@ out: return err; } -static long kfd_ioctl_get_clock_counters(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_get_clock_counters(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_clock_counters_args args; + struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; struct timespec time; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; /* Reading GPU clock counter from KGD */ - args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic(&time); - args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time); + args->cpu_clock_counter = (uint64_t)timespec_to_ns(&time); get_monotonic_boottime(&time); - args.system_clock_counter = (uint64_t)timespec_to_ns(&time); + args->system_clock_counter = (uint64_t)timespec_to_ns(&time); /* Since the counter is in nano-seconds we use 1GHz frequency */ - args.system_clock_freq = 1000000000; - - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; + args->system_clock_freq = 1000000000; return 0; } static int kfd_ioctl_get_process_apertures(struct file *filp, - struct kfd_process *p, void __user *arg) + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_process_apertures_args args; + struct kfd_ioctl_get_process_apertures_args *args = data; struct kfd_process_device_apertures *pAperture; struct kfd_process_device *pdd; dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - args.num_of_nodes = 0; + args->num_of_nodes = 0; mutex_lock(&p->mutex); @@ -481,7 +445,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, /* Run over all pdd of the process */ pdd = kfd_get_first_process_device_data(p); do { - pAperture = &args.process_apertures[args.num_of_nodes]; + pAperture = + &args->process_apertures[args->num_of_nodes]; pAperture->gpu_id = pdd->dev->id; pAperture->lds_base = pdd->lds_base; pAperture->lds_limit = pdd->lds_limit; @@ -491,7 +456,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, pAperture->scratch_limit = pdd->scratch_limit; dev_dbg(kfd_device, - "node id %u\n", args.num_of_nodes); + "node id %u\n", args->num_of_nodes); dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id); dev_dbg(kfd_device, @@ -507,80 +472,131 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit); - args.num_of_nodes++; + args->num_of_nodes++; } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && - (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } mutex_unlock(&p->mutex); - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; - return 0; } +#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + +/** Ioctl table */ +static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, + kfd_ioctl_get_version, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, + kfd_ioctl_create_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, + kfd_ioctl_destroy_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, + kfd_ioctl_set_memory_policy, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, + kfd_ioctl_get_clock_counters, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, + kfd_ioctl_get_process_apertures, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, + kfd_ioctl_update_queue, 0), +}; + +#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) + static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; - long err = -EINVAL; + amdkfd_ioctl_t *func; + const struct amdkfd_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + char stack_kdata[128]; + char *kdata = NULL; + unsigned int usize, asize; + int retcode = -EINVAL; + + if (nr >= AMDKFD_CORE_IOCTL_COUNT) + goto err_i1; + + if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { + u32 amdkfd_size; + + ioctl = &amdkfd_ioctls[nr]; - dev_dbg(kfd_device, - "ioctl cmd 0x%x (#%d), arg 0x%lx\n", - cmd, _IOC_NR(cmd), arg); + amdkfd_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (amdkfd_size > asize) + asize = amdkfd_size; + + cmd = ioctl->cmd; + } else + goto err_i1; + + dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); - if (IS_ERR(process)) - return PTR_ERR(process); + if (IS_ERR(process)) { + dev_dbg(kfd_device, "no process\n"); + goto err_i1; + } - switch (cmd) { - case KFD_IOC_GET_VERSION: - err = kfd_ioctl_get_version(filep, process, (void __user *)arg); - break; - case KFD_IOC_CREATE_QUEUE: - err = kfd_ioctl_create_queue(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_DESTROY_QUEUE: - err = kfd_ioctl_destroy_queue(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_SET_MEMORY_POLICY: - err = kfd_ioctl_set_memory_policy(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_GET_CLOCK_COUNTERS: - err = kfd_ioctl_get_clock_counters(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_GET_PROCESS_APERTURES: - err = kfd_ioctl_get_process_apertures(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_UPDATE_QUEUE: - err = kfd_ioctl_update_queue(filep, process, - (void __user *)arg); - break; - - default: - dev_err(kfd_device, - "unknown ioctl cmd 0x%x, arg 0x%lx)\n", - cmd, arg); - err = -EINVAL; - break; + /* Do not trust userspace, use our own definition */ + func = ioctl->func; + + if (unlikely(!func)) { + dev_dbg(kfd_device, "no function\n"); + retcode = -EINVAL; + goto err_i1; } - if (err < 0) - dev_err(kfd_device, - "ioctl error %ld for ioctl cmd 0x%x (#%d)\n", - err, cmd, _IOC_NR(cmd)); + if (cmd & (IOC_IN | IOC_OUT)) { + if (asize <= sizeof(stack_kdata)) { + kdata = stack_kdata; + } else { + kdata = kmalloc(asize, GFP_KERNEL); + if (!kdata) { + retcode = -ENOMEM; + goto err_i1; + } + } + if (asize > usize) + memset(kdata + usize, 0, asize - usize); + } - return err; + if (cmd & IOC_IN) { + if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { + retcode = -EFAULT; + goto err_i1; + } + } else if (cmd & IOC_OUT) { + memset(kdata, 0, usize); + } + + retcode = func(filep, process, kdata); + + if (cmd & IOC_OUT) + if (copy_to_user((void __user *)arg, kdata, usize) != 0) + retcode = -EFAULT; + +err_i1: + if (!ioctl) + dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), cmd, nr); + + if (kdata != stack_kdata) + kfree(kdata); + + if (retcode) + dev_dbg(kfd_device, "ret = %d\n", retcode); + + return retcode; } static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 924e90c072e5..9c8961d22360 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -161,6 +161,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm, { int bit = qpd->vmid - KFD_VMID_START_OFFSET; + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); qpd->vmid = 0; q->properties.vmid = 0; @@ -272,6 +275,18 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, return retval; } + pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", + q->pipe, + q->queue); + + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, q->properties.write_ptr); + if (retval != 0) { + deallocate_hqd(dqm, q); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } + return 0; } @@ -320,6 +335,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; struct mqd_manager *mqd; + bool prev_active = false; BUG_ON(!dqm || !q || !q->mqd); @@ -330,10 +346,18 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) return -ENOMEM; } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if (q->properties.is_active == true) + prev_active = true; + + /* + * + * check active state vs. the previous state + * and modify counter accordingly + */ + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + if ((q->properties.is_active == true) && (prev_active == false)) dqm->queue_count++; - else + else if ((q->properties.is_active == false) && (prev_active == true)) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 66df4da01c29..e64aa99e5e41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -299,13 +299,13 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_dev *dev; struct kfd_process_device *pdd; - mutex_lock(&process->mutex); - /*Iterating over all devices*/ while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) { pdd = kfd_get_process_device_data(dev, process, 1); + if (!pdd) + return -1; /* * For 64 bit process aperture will be statically reserved in @@ -348,8 +348,6 @@ int kfd_init_apertures(struct kfd_process *process) id++; } - mutex_unlock(&process->mutex); - return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index adc31474e786..4c3828cf45bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -184,7 +184,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { - return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, + return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, pipe_id, queue_id); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 71699ad97d74..4c25ef504f79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,7 +32,7 @@ int kfd_pasid_init(void) { pasid_limit = max_num_of_processes; - pasid_bitmap = kzalloc(BITS_TO_LONGS(pasid_limit), GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f9fb81e3bb09..a5edb29507e3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -463,6 +463,24 @@ struct kfd_process { bool is_32bit_user_mode; }; +/** + * Ioctl function type. + * + * \param filep pointer to file structure. + * \param p amdkfd process pointer. + * \param data pointer to arg that was copied from user. + */ +typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, + void *data); + +struct amdkfd_ioctl_desc { + unsigned int cmd; + int flags; + amdkfd_ioctl_t *func; + unsigned int cmd_drv; + const char *name; +}; + void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(const struct task_struct *); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b85eb0b830b4..3c76ef05cbcf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -26,6 +26,8 @@ #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> +#include <linux/compat.h> + struct mm_struct; #include "kfd_priv.h" @@ -285,8 +287,15 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_process_pqm_init; + /* init process apertures*/ + process->is_32bit_user_mode = is_compat_task(); + if (kfd_init_apertures(process) != 0) + goto err_init_apretures; + return process; +err_init_apretures: + pqm_uninit(&process->pqm); err_process_pqm_init: hash_del_rcu(&process->kfd_processes); synchronize_rcu(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5733e2859e8a..cca1708fd811 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -700,8 +700,6 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_per_cu); sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", dev->node_props.max_slots_scratch_cu); - sysfs_show_32bit_prop(buffer, "engine_id", - dev->node_props.engine_id); sysfs_show_32bit_prop(buffer, "vendor_id", dev->node_props.vendor_id); sysfs_show_32bit_prop(buffer, "device_id", @@ -715,6 +713,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->gpu->kgd)); sysfs_show_64bit_prop(buffer, "local_mem_size", kfd2kgd->get_vmem_size(dev->gpu->kgd)); + + sysfs_show_32bit_prop(buffer, "fw_version", + kfd2kgd->get_fw_version( + dev->gpu->kgd, + KGD_ENGINE_MEC1)); + } ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", @@ -917,7 +921,7 @@ static int kfd_build_sysfs_node_tree(void) uint32_t i = 0; list_for_each_entry(dev, &topology_device_list, list) { - ret = kfd_build_sysfs_node_entry(dev, 0); + ret = kfd_build_sysfs_node_entry(dev, i); if (ret < 0) return ret; i++; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 9c729dd8dd50..96a512208fad 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -45,6 +45,17 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; +enum kgd_engine_type { + KGD_ENGINE_PFP = 1, + KGD_ENGINE_ME, + KGD_ENGINE_CE, + KGD_ENGINE_MEC1, + KGD_ENGINE_MEC2, + KGD_ENGINE_RLC, + KGD_ENGINE_SDMA, + KGD_ENGINE_MAX +}; + struct kgd2kfd_shared_resources { /* Bit n == 1 means VMID n is available for KFD. */ unsigned int compute_vmid_bitmap; @@ -137,6 +148,8 @@ struct kgd2kfd_calls { * * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. * + * @get_fw_version: Returns FW versions from the header + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -170,12 +183,14 @@ struct kfd2kgd_calls { int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); - bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, + bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); + uint16_t (*get_fw_version)(struct kgd_dev *kgd, + enum kgd_engine_type type); }; bool kgd2kfd_init(unsigned interface_version, diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 4a78a773151c..bbdbe4721573 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -61,7 +61,7 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state, struct drm_crtc_state *crtc_state; if (plane->state->crtc) { - crtc_state = state->crtc_states[drm_crtc_index(plane->crtc)]; + crtc_state = state->crtc_states[drm_crtc_index(plane->state->crtc)]; if (WARN_ON(!crtc_state)) return; diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index f5a5f18efa5b..4d79dad9d44f 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -830,6 +830,8 @@ drm_get_last_vbltimestamp(struct drm_device *dev, int crtc, * vblank events since the system was booted, including lost events due to * modesetting activity. * + * This is the legacy version of drm_crtc_vblank_count(). + * * Returns: * The software vblank counter. */ @@ -844,6 +846,25 @@ u32 drm_vblank_count(struct drm_device *dev, int crtc) EXPORT_SYMBOL(drm_vblank_count); /** + * drm_crtc_vblank_count - retrieve "cooked" vblank counter value + * @crtc: which counter to retrieve + * + * Fetches the "cooked" vblank count value that represents the number of + * vblank events since the system was booted, including lost events due to + * modesetting activity. + * + * This is the native KMS version of drm_vblank_count(). + * + * Returns: + * The software vblank counter. + */ +u32 drm_crtc_vblank_count(struct drm_crtc *crtc) +{ + return drm_vblank_count(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_count); + +/** * drm_vblank_count_and_time - retrieve "cooked" vblank counter value * and the system timestamp corresponding to that vblank counter value. * @@ -904,6 +925,8 @@ static void send_vblank_event(struct drm_device *dev, * * Updates sequence # and timestamp on event, and sends it to userspace. * Caller must hold event lock. + * + * This is the legacy version of drm_crtc_send_vblank_event(). */ void drm_send_vblank_event(struct drm_device *dev, int crtc, struct drm_pending_vblank_event *e) @@ -923,6 +946,23 @@ void drm_send_vblank_event(struct drm_device *dev, int crtc, EXPORT_SYMBOL(drm_send_vblank_event); /** + * drm_crtc_send_vblank_event - helper to send vblank event after pageflip + * @crtc: the source CRTC of the vblank event + * @e: the event to send + * + * Updates sequence # and timestamp on event, and sends it to userspace. + * Caller must hold event lock. + * + * This is the native KMS version of drm_send_vblank_event(). + */ +void drm_crtc_send_vblank_event(struct drm_crtc *crtc, + struct drm_pending_vblank_event *e) +{ + drm_send_vblank_event(crtc->dev, drm_crtc_index(crtc), e); +} +EXPORT_SYMBOL(drm_crtc_send_vblank_event); + +/** * drm_vblank_enable - enable the vblank interrupt on a CRTC * @dev: DRM device * @crtc: CRTC in question @@ -1594,6 +1634,8 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) * * Drivers should call this routine in their vblank interrupt handlers to * update the vblank counter and send any signals that may be pending. + * + * This is the legacy version of drm_crtc_handle_vblank(). */ bool drm_handle_vblank(struct drm_device *dev, int crtc) { @@ -1670,3 +1712,21 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) return true; } EXPORT_SYMBOL(drm_handle_vblank); + +/** + * drm_crtc_handle_vblank - handle a vblank event + * @crtc: where this event occurred + * + * Drivers should call this routine in their vblank interrupt handlers to + * update the vblank counter and send any signals that may be pending. + * + * This is the native KMS version of drm_handle_vblank(). + * + * Returns: + * True if the event was successfully handled, false on failure. + */ +bool drm_crtc_handle_vblank(struct drm_crtc *crtc) +{ + return drm_handle_vblank(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_handle_vblank); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f990ab4c3efb..574057cd1d09 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -811,6 +811,8 @@ int i915_reset(struct drm_device *dev) if (!i915.reset) return 0; + intel_reset_gt_powersave(dev); + mutex_lock(&dev->struct_mutex); i915_gem_reset(dev); @@ -880,7 +882,7 @@ int i915_reset(struct drm_device *dev) * of re-init after reset. */ if (INTEL_INFO(dev)->gen > 5) - intel_reset_gt_powersave(dev); + intel_enable_gt_powersave(dev); } else { mutex_unlock(&dev->struct_mutex); } @@ -1584,7 +1586,7 @@ static struct drm_driver driver = { .gem_prime_import = i915_gem_prime_import, .dumb_create = i915_gem_dumb_create, - .dumb_map_offset = i915_gem_dumb_map_offset, + .dumb_map_offset = i915_gem_mmap_gtt, .dumb_destroy = drm_gem_dumb_destroy, .ioctls = i915_ioctls, .fops = &i915_driver_fops, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63bcda5541ec..e9f891c432f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1756,8 +1756,6 @@ struct drm_i915_private { */ struct workqueue_struct *dp_wq; - uint32_t bios_vgacntr; - /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, @@ -2501,9 +2499,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -int i915_gem_dumb_map_offset(struct drm_file *file_priv, - struct drm_device *dev, uint32_t handle, - uint64_t *offset); +int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, + uint32_t handle, uint64_t *offset); /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a9faea626db..c11603b4cf1d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -401,7 +401,6 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, - bool dumb, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -417,7 +416,6 @@ i915_gem_create(struct drm_file *file, if (obj == NULL) return -ENOMEM; - obj->base.dumb = dumb; ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(&obj->base); @@ -437,7 +435,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, true, &args->handle); + args->size, &args->handle); } /** @@ -450,7 +448,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, false, &args->handle); + args->size, &args->handle); } static inline int @@ -1050,6 +1048,7 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; @@ -1069,9 +1068,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; } + intel_runtime_pm_get(dev_priv); + ret = i915_mutex_lock_interruptible(dev); if (ret) - return ret; + goto put_rpm; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { @@ -1123,6 +1124,9 @@ out: drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); +put_rpm: + intel_runtime_pm_put(dev_priv); + return ret; } @@ -1840,10 +1844,10 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) drm_gem_free_mmap_offset(&obj->base); } -static int +int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, - uint32_t handle, bool dumb, + uint32_t handle, uint64_t *offset) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1860,13 +1864,6 @@ i915_gem_mmap_gtt(struct drm_file *file, goto unlock; } - /* - * We don't allow dumb mmaps on objects created using another - * interface. - */ - WARN_ONCE(dumb && !(obj->base.dumb || obj->base.import_attach), - "Illegal dumb map of accelerated buffer.\n"); - if (obj->base.size > dev_priv->gtt.mappable_end) { ret = -E2BIG; goto out; @@ -1891,15 +1888,6 @@ unlock: return ret; } -int -i915_gem_dumb_map_offset(struct drm_file *file, - struct drm_device *dev, - uint32_t handle, - uint64_t *offset) -{ - return i915_gem_mmap_gtt(file, dev, handle, true, offset); -} - /** * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device @@ -1921,7 +1909,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_mmap_gtt *args = data; - return i915_gem_mmap_gtt(file, dev, args->handle, false, &args->offset); + return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d17ff435f276..d011ec82ef1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -473,7 +473,12 @@ mi_set_context(struct intel_engine_cs *ring, u32 hw_flags) { u32 flags = hw_flags | MI_MM_SPACE_GTT; - int ret; + const int num_rings = + /* Use an extended w/a on ivb+ if signalling from other rings */ + i915_semaphore_is_enabled(ring->dev) ? + hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 : + 0; + int len, i, ret; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -490,15 +495,31 @@ mi_set_context(struct intel_engine_cs *ring, if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); - ret = intel_ring_begin(ring, 6); + + len = 4; + if (INTEL_INFO(ring->dev)->gen >= 7) + len += 2 + (num_rings ? 4*num_rings + 2 : 0); + + ret = intel_ring_begin(ring, len); if (ret) return ret; /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (INTEL_INFO(ring->dev)->gen >= 7) + if (INTEL_INFO(ring->dev)->gen >= 7) { intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); - else - intel_ring_emit(ring, MI_NOOP); + if (num_rings) { + struct intel_engine_cs *signaller; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); + for_each_ring(signaller, to_i915(ring->dev), i) { + if (signaller == ring) + continue; + + intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base)); + intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + } + } + } intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); @@ -510,10 +531,21 @@ mi_set_context(struct intel_engine_cs *ring, */ intel_ring_emit(ring, MI_NOOP); - if (INTEL_INFO(ring->dev)->gen >= 7) + if (INTEL_INFO(ring->dev)->gen >= 7) { + if (num_rings) { + struct intel_engine_cs *signaller; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); + for_each_ring(signaller, to_i915(ring->dev), i) { + if (signaller == ring) + continue; + + intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base)); + intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + } + } intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); - else - intel_ring_emit(ring, MI_NOOP); + } intel_ring_advance(ring); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f06027ba3ee5..11738316394a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -121,9 +121,6 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - WARN_ONCE(obj->base.dumb, - "GPU use of dumb buffer is illegal.\n"); - drm_gem_object_reference(&obj->base); list_add_tail(&obj->obj_exec_link, &objects); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 981834b0f9b6..d0d3dfbe6d2a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -281,10 +281,14 @@ void gen6_enable_rps_interrupts(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; spin_lock_irq(&dev_priv->irq_lock); + WARN_ON(dev_priv->rps.pm_iir); WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); dev_priv->rps.interrupts_enabled = true; + I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) | + dev_priv->pm_rps_events); gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + spin_unlock_irq(&dev_priv->irq_lock); } @@ -3307,8 +3311,10 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs); if (INTEL_INFO(dev)->gen >= 6) { - pm_irqs |= dev_priv->pm_rps_events; - + /* + * RPS interrupts will get enabled/disabled on demand when RPS + * itself is enabled/disabled. + */ if (HAS_VEBOX(dev)) pm_irqs |= PM_VEBOX_USER_INTERRUPT; @@ -3520,7 +3526,11 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) dev_priv->pm_irq_mask = 0xffffffff; GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]); GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]); - GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, dev_priv->pm_rps_events); + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, 0); GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]); } @@ -3609,7 +3619,7 @@ static void vlv_display_irq_uninstall(struct drm_i915_private *dev_priv) vlv_display_irq_reset(dev_priv); - dev_priv->irq_mask = 0; + dev_priv->irq_mask = ~0; } static void valleyview_irq_uninstall(struct drm_device *dev) @@ -3715,8 +3725,6 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3726,6 +3734,7 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if (I915_READ16(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; @@ -3897,8 +3906,6 @@ static bool i915_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3908,6 +3915,7 @@ static bool i915_handle_vblank(struct drm_device *dev, if (I915_READ(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eefdc238f70b..172de3b3433b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -395,6 +395,7 @@ #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) @@ -1128,6 +1129,7 @@ enum punit_power_well { #define GEN6_VERSYNC (RING_SYNC_1(VEBOX_RING_BASE)) #define GEN6_VEVSYNC (RING_SYNC_2(VEBOX_RING_BASE)) #define GEN6_NOSYNC 0 +#define RING_PSMI_CTL(base) ((base)+0x50) #define RING_MAX_IDLE(base) ((base)+0x54) #define RING_HWS_PGA(base) ((base)+0x80) #define RING_HWS_PGA_GEN6(base) ((base)+0x2080) @@ -1458,6 +1460,7 @@ enum punit_power_well { #define GEN6_BLITTER_FBC_NOTIFY (1<<3) #define GEN6_RC_SLEEP_PSMI_CONTROL 0x2050 +#define GEN6_PSMI_SLEEP_MSG_DISABLE (1 << 0) #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fb3e3d429191..e2af1383b179 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13057,11 +13057,7 @@ static void i915_disable_vga(struct drm_device *dev) vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); udelay(300); - /* - * Fujitsu-Siemens Lifebook S6010 (830) has problems resuming - * from S3 without preserving (some of?) the other bits. - */ - I915_WRITE(vga_reg, dev_priv->bios_vgacntr | VGA_DISP_DISABLE); + I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); } @@ -13146,8 +13142,6 @@ void intel_modeset_init(struct drm_device *dev) intel_shared_dpll_init(dev); - /* save the BIOS value before clobbering it */ - dev_priv->bios_vgacntr = I915_READ(i915_vgacntrl_reg(dev)); /* Just disable it once at startup */ i915_disable_vga(dev); intel_setup_outputs(dev); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1f4b56e273c8..964b28e3c630 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6191,6 +6191,20 @@ void intel_cleanup_gt_powersave(struct drm_device *dev) valleyview_cleanup_gt_powersave(dev); } +static void gen6_suspend_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + flush_delayed_work(&dev_priv->rps.delayed_resume_work); + + /* + * TODO: disable RPS interrupts on GEN9+ too once RPS support + * is added for it. + */ + if (INTEL_INFO(dev)->gen < 9) + gen6_disable_rps_interrupts(dev); +} + /** * intel_suspend_gt_powersave - suspend PM work and helper threads * @dev: drm device @@ -6206,14 +6220,7 @@ void intel_suspend_gt_powersave(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 6) return; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - /* - * TODO: disable RPS interrupts on GEN9+ too once RPS support - * is added for it. - */ - if (INTEL_INFO(dev)->gen < 9) - gen6_disable_rps_interrupts(dev); + gen6_suspend_rps(dev); /* Force GPU to min freq during suspend */ gen6_rps_idle(dev_priv); @@ -6316,8 +6323,11 @@ void intel_reset_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + if (INTEL_INFO(dev)->gen < 6) + return; + + gen6_suspend_rps(dev); dev_priv->rps.enabled = false; - intel_enable_gt_powersave(dev); } static void ibx_init_clock_gating(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9f445e9a75d1..c7bc93d28d84 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -362,12 +362,15 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; /* * TLB invalidate requires a post-sync write. */ flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index f5a78d53e297..ac6da7102fbb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -615,29 +615,6 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, vlv_power_sequencer_reset(dev_priv); } -static void check_power_well_state(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - bool enabled = power_well->ops->is_enabled(dev_priv, power_well); - - if (power_well->always_on || !i915.disable_power_well) { - if (!enabled) - goto mismatch; - - return; - } - - if (enabled != (power_well->count > 0)) - goto mismatch; - - return; - -mismatch: - WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", - power_well->name, power_well->always_on, enabled, - power_well->count, i915.disable_power_well); -} - /** * intel_display_power_get - grab a power domain reference * @dev_priv: i915 device instance @@ -669,8 +646,6 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, power_well->ops->enable(dev_priv, power_well); power_well->hw_enabled = true; } - - check_power_well_state(dev_priv, power_well); } power_domains->domain_use_count[domain]++; @@ -709,8 +684,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, power_well->hw_enabled = false; power_well->ops->disable(dev_priv, power_well); } - - check_power_well_state(dev_priv, power_well); } mutex_unlock(&power_domains->lock); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index aa873048308b..94a5bee69fe7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -386,9 +386,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu) msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); drm_gem_object_unreference(gpu->memptrs_bo); } - if (gpu->pm4) - release_firmware(gpu->pm4); - if (gpu->pfp) - release_firmware(gpu->pfp); + release_firmware(gpu->pm4); + release_firmware(gpu->pfp); msm_gpu_cleanup(&gpu->base); } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c index fbebb0405d76..b4e70e0e3cfa 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c @@ -141,6 +141,15 @@ static int hpd_enable(struct hdmi_connector *hdmi_connector) uint32_t hpd_ctrl; int i, ret; + for (i = 0; i < config->hpd_reg_cnt; i++) { + ret = regulator_enable(hdmi->hpd_regs[i]); + if (ret) { + dev_err(dev->dev, "failed to enable hpd regulator: %s (%d)\n", + config->hpd_reg_names[i], ret); + goto fail; + } + } + ret = gpio_config(hdmi, true); if (ret) { dev_err(dev->dev, "failed to configure GPIOs: %d\n", ret); @@ -164,15 +173,6 @@ static int hpd_enable(struct hdmi_connector *hdmi_connector) } } - for (i = 0; i < config->hpd_reg_cnt; i++) { - ret = regulator_enable(hdmi->hpd_regs[i]); - if (ret) { - dev_err(dev->dev, "failed to enable hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - goto fail; - } - } - hdmi_set_mode(hdmi, false); phy->funcs->reset(phy); hdmi_set_mode(hdmi, true); @@ -200,7 +200,7 @@ fail: return ret; } -static int hdp_disable(struct hdmi_connector *hdmi_connector) +static void hdp_disable(struct hdmi_connector *hdmi_connector) { struct hdmi *hdmi = hdmi_connector->hdmi; const struct hdmi_platform_config *config = hdmi->config; @@ -212,28 +212,19 @@ static int hdp_disable(struct hdmi_connector *hdmi_connector) hdmi_set_mode(hdmi, false); - for (i = 0; i < config->hpd_reg_cnt; i++) { - ret = regulator_disable(hdmi->hpd_regs[i]); - if (ret) { - dev_err(dev->dev, "failed to disable hpd regulator: %s (%d)\n", - config->hpd_reg_names[i], ret); - goto fail; - } - } - for (i = 0; i < config->hpd_clk_cnt; i++) clk_disable_unprepare(hdmi->hpd_clks[i]); ret = gpio_config(hdmi, false); - if (ret) { - dev_err(dev->dev, "failed to unconfigure GPIOs: %d\n", ret); - goto fail; - } - - return 0; + if (ret) + dev_warn(dev->dev, "failed to unconfigure GPIOs: %d\n", ret); -fail: - return ret; + for (i = 0; i < config->hpd_reg_cnt; i++) { + ret = regulator_disable(hdmi->hpd_regs[i]); + if (ret) + dev_warn(dev->dev, "failed to disable hpd regulator: %s (%d)\n", + config->hpd_reg_names[i], ret); + } } static void @@ -260,11 +251,11 @@ void hdmi_connector_irq(struct drm_connector *connector) (hpd_int_status & HDMI_HPD_INT_STATUS_INT)) { bool detected = !!(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED); - DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl); - - /* ack the irq: */ + /* ack & disable (temporarily) HPD events: */ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, - hpd_int_ctrl | HDMI_HPD_INT_CTRL_INT_ACK); + HDMI_HPD_INT_CTRL_INT_ACK); + + DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl); /* detect disconnect if we are connected or visa versa: */ hpd_int_ctrl = HDMI_HPD_INT_CTRL_INT_EN; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index a7672e100d8b..3449213f1e76 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -331,17 +331,8 @@ static int mdp4_crtc_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct drm_device *dev = crtc->dev; - DBG("%s: check", mdp4_crtc->name); - - if (mdp4_crtc->event) { - dev_err(dev->dev, "already pending flip!\n"); - return -EBUSY; - } - // TODO anything else to check? - return 0; } @@ -357,7 +348,7 @@ static void mdp4_crtc_atomic_flush(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; unsigned long flags; - DBG("%s: flush", mdp4_crtc->name); + DBG("%s: event: %p", mdp4_crtc->name, crtc->state->event); WARN_ON(mdp4_crtc->event); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 0e9a2e3a82d7..f021f960a8a2 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -303,11 +303,6 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, DBG("%s: check", mdp5_crtc->name); - if (mdp5_crtc->event) { - dev_err(dev->dev, "already pending flip!\n"); - return -EBUSY; - } - /* request a free CTL, if none is already allocated for this CRTC */ if (state->enable && !mdp5_crtc->ctl) { mdp5_crtc->ctl = mdp5_ctlm_request(mdp5_kms->ctlm, crtc); @@ -364,7 +359,7 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; unsigned long flags; - DBG("%s: flush", mdp5_crtc->name); + DBG("%s: event: %p", mdp5_crtc->name, crtc->state->event); WARN_ON(mdp5_crtc->event); @@ -460,10 +455,7 @@ void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, /* now that we know what irq's we want: */ mdp5_crtc->err.irqmask = intf2err(intf); mdp5_crtc->vblank.irqmask = intf2vblank(intf); - - /* when called from modeset_init(), skip the rest until later: */ - if (!mdp5_kms) - return; + mdp_irq_update(&mdp5_kms->base); spin_lock_irqsave(&mdp5_kms->resource_lock, flags); intf_sel = mdp5_read(mdp5_kms, REG_MDP5_DISP_INTF_SEL); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index a11f1b80c488..9f01a4f21af2 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -216,17 +216,7 @@ static int modeset_init(struct mdp5_kms *mdp5_kms) goto fail; } - /* NOTE: the vsync and error irq's are actually associated with - * the INTF/encoder.. the easiest way to deal with this (ie. what - * we do now) is assume a fixed relationship between crtc's and - * encoders. I'm not sure if there is ever a need to more freely - * assign crtcs to encoders, but if there is then we need to take - * care of error and vblank irq's that the crtc has registered, - * and also update user-requested vblank_mask. - */ - encoder->possible_crtcs = BIT(0); - mdp5_crtc_set_intf(priv->crtcs[0], 3, INTF_HDMI); - + encoder->possible_crtcs = (1 << priv->num_crtcs) - 1;; priv->encoders[priv->num_encoders++] = encoder; /* Construct bridge/connector for HDMI: */ diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.c b/drivers/gpu/drm/msm/mdp/mdp_kms.c index 03455b64a245..2a731722d840 100644 --- a/drivers/gpu/drm/msm/mdp/mdp_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp_kms.c @@ -42,7 +42,10 @@ static void update_irq(struct mdp_kms *mdp_kms) mdp_kms->funcs->set_irqmask(mdp_kms, irqmask); } -static void update_irq_unlocked(struct mdp_kms *mdp_kms) +/* if an mdp_irq's irqmask has changed, such as when mdp5 crtc<->encoder + * link changes, this must be called to figure out the new global irqmask + */ +void mdp_irq_update(struct mdp_kms *mdp_kms) { unsigned long flags; spin_lock_irqsave(&list_lock, flags); @@ -122,7 +125,7 @@ void mdp_irq_register(struct mdp_kms *mdp_kms, struct mdp_irq *irq) spin_unlock_irqrestore(&list_lock, flags); if (needs_update) - update_irq_unlocked(mdp_kms); + mdp_irq_update(mdp_kms); } void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq) @@ -141,5 +144,5 @@ void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq) spin_unlock_irqrestore(&list_lock, flags); if (needs_update) - update_irq_unlocked(mdp_kms); + mdp_irq_update(mdp_kms); } diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.h b/drivers/gpu/drm/msm/mdp/mdp_kms.h index 99557b5ad4fd..b268ce95d394 100644 --- a/drivers/gpu/drm/msm/mdp/mdp_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp_kms.h @@ -75,7 +75,7 @@ void mdp_update_vblank_mask(struct mdp_kms *mdp_kms, uint32_t mask, bool enable) void mdp_irq_wait(struct mdp_kms *mdp_kms, uint32_t irqmask); void mdp_irq_register(struct mdp_kms *mdp_kms, struct mdp_irq *irq); void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq); - +void mdp_irq_update(struct mdp_kms *mdp_kms); /* * pixel format helpers: diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index f0de412e13dc..191968256c58 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -23,10 +23,41 @@ struct msm_commit { struct drm_atomic_state *state; uint32_t fence; struct msm_fence_cb fence_cb; + uint32_t crtc_mask; }; static void fence_cb(struct msm_fence_cb *cb); +/* block until specified crtcs are no longer pending update, and + * atomically mark them as pending update + */ +static int start_atomic(struct msm_drm_private *priv, uint32_t crtc_mask) +{ + int ret; + + spin_lock(&priv->pending_crtcs_event.lock); + ret = wait_event_interruptible_locked(priv->pending_crtcs_event, + !(priv->pending_crtcs & crtc_mask)); + if (ret == 0) { + DBG("start: %08x", crtc_mask); + priv->pending_crtcs |= crtc_mask; + } + spin_unlock(&priv->pending_crtcs_event.lock); + + return ret; +} + +/* clear specified crtcs (no longer pending update) + */ +static void end_atomic(struct msm_drm_private *priv, uint32_t crtc_mask) +{ + spin_lock(&priv->pending_crtcs_event.lock); + DBG("end: %08x", crtc_mask); + priv->pending_crtcs &= ~crtc_mask; + wake_up_all_locked(&priv->pending_crtcs_event); + spin_unlock(&priv->pending_crtcs_event.lock); +} + static struct msm_commit *new_commit(struct drm_atomic_state *state) { struct msm_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); @@ -58,12 +89,27 @@ static void complete_commit(struct msm_commit *c) drm_atomic_helper_commit_post_planes(dev, state); + /* NOTE: _wait_for_vblanks() only waits for vblank on + * enabled CRTCs. So we end up faulting when disabling + * due to (potentially) unref'ing the outgoing fb's + * before the vblank when the disable has latched. + * + * But if it did wait on disabled (or newly disabled) + * CRTCs, that would be racy (ie. we could have missed + * the irq. We need some way to poll for pipe shut + * down. Or just live with occasionally hitting the + * timeout in the CRTC disable path (which really should + * not be critical path) + */ + drm_atomic_helper_wait_for_vblanks(dev, state); drm_atomic_helper_cleanup_planes(dev, state); drm_atomic_state_free(state); + end_atomic(dev->dev_private, c->crtc_mask); + kfree(c); } @@ -97,8 +143,9 @@ static void add_fb(struct msm_commit *c, struct drm_framebuffer *fb) int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool async) { - struct msm_commit *c; int nplanes = dev->mode_config.num_total_plane; + int ncrtcs = dev->mode_config.num_crtc; + struct msm_commit *c; int i, ret; ret = drm_atomic_helper_prepare_planes(dev, state); @@ -106,6 +153,18 @@ int msm_atomic_commit(struct drm_device *dev, return ret; c = new_commit(state); + if (!c) + return -ENOMEM; + + /* + * Figure out what crtcs we have: + */ + for (i = 0; i < ncrtcs; i++) { + struct drm_crtc *crtc = state->crtcs[i]; + if (!crtc) + continue; + c->crtc_mask |= (1 << drm_crtc_index(crtc)); + } /* * Figure out what fence to wait for: @@ -122,6 +181,14 @@ int msm_atomic_commit(struct drm_device *dev, } /* + * Wait for pending updates on any of the same crtc's and then + * mark our set of crtc's as busy: + */ + ret = start_atomic(dev->dev_private, c->crtc_mask); + if (ret) + return ret; + + /* * This is the point of no return - everything below never fails except * when the hw goes bonghits. Which means we can commit the new state on * the software side now. diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index c795217e1bfc..9a61546a0b05 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -193,6 +193,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) priv->wq = alloc_ordered_workqueue("msm", 0); init_waitqueue_head(&priv->fence_event); + init_waitqueue_head(&priv->pending_crtcs_event); INIT_LIST_HEAD(&priv->inactive_list); INIT_LIST_HEAD(&priv->fence_cbs); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 136303818436..b69ef2d5a26c 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -96,6 +96,10 @@ struct msm_drm_private { /* callbacks deferred until bo is inactive: */ struct list_head fence_cbs; + /* crtcs pending async atomic updates: */ + uint32_t pending_crtcs; + wait_queue_head_t pending_crtcs_event; + /* registered MMUs: */ unsigned int num_mmus; struct msm_mmu *mmus[NUM_DOMAINS]; diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 94d55e526b4e..1f3af13ccede 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -190,8 +190,7 @@ fail_unlock: fail: if (ret) { - if (fbi) - framebuffer_release(fbi); + framebuffer_release(fbi); if (fb) { drm_framebuffer_unregister_private(fb); drm_framebuffer_remove(fb); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 4a6f0e49d5b5..49dea4fb55ac 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -535,8 +535,7 @@ void msm_gem_free_object(struct drm_gem_object *obj) drm_free_large(msm_obj->pages); } else { - if (msm_obj->vaddr) - vunmap(msm_obj->vaddr); + vunmap(msm_obj->vaddr); put_pages(obj); } diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c index ff2b434b3db4..760947e380c9 100644 --- a/drivers/gpu/drm/nouveau/core/core/event.c +++ b/drivers/gpu/drm/nouveau/core/core/event.c @@ -26,7 +26,7 @@ void nvkm_event_put(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (--event->refs[index * event->types_nr + type] == 0) { @@ -39,7 +39,7 @@ nvkm_event_put(struct nvkm_event *event, u32 types, int index) void nvkm_event_get(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (++event->refs[index * event->types_nr + type] == 1) { diff --git a/drivers/gpu/drm/nouveau/core/core/notify.c b/drivers/gpu/drm/nouveau/core/core/notify.c index d1bcde55e9d7..839a32577680 100644 --- a/drivers/gpu/drm/nouveau/core/core/notify.c +++ b/drivers/gpu/drm/nouveau/core/core/notify.c @@ -98,7 +98,7 @@ nvkm_notify_send(struct nvkm_notify *notify, void *data, u32 size) struct nvkm_event *event = notify->event; unsigned long flags; - BUG_ON(!spin_is_locked(&event->list_lock)); + assert_spin_locked(&event->list_lock); BUG_ON(size != notify->size); spin_lock_irqsave(&event->refs_lock, flags); diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c index 674da1f095b2..732922690653 100644 --- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c @@ -249,6 +249,39 @@ nve0_identify(struct nouveau_device *device) device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; device->oclass[NVDEV_ENGINE_PERFMON] = &nvf0_perfmon_oclass; break; + case 0x106: + device->cname = "GK208B"; + device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; + device->oclass[NVDEV_SUBDEV_GPIO ] = nve0_gpio_oclass; + device->oclass[NVDEV_SUBDEV_I2C ] = nve0_i2c_oclass; + device->oclass[NVDEV_SUBDEV_FUSE ] = &gf100_fuse_oclass; + device->oclass[NVDEV_SUBDEV_CLOCK ] = &nve0_clock_oclass; + device->oclass[NVDEV_SUBDEV_THERM ] = &nvd0_therm_oclass; + device->oclass[NVDEV_SUBDEV_MXM ] = &nv50_mxm_oclass; + device->oclass[NVDEV_SUBDEV_DEVINIT] = nvc0_devinit_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = gk20a_mc_oclass; + device->oclass[NVDEV_SUBDEV_BUS ] = nvc0_bus_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = nve0_fb_oclass; + device->oclass[NVDEV_SUBDEV_LTC ] = gk104_ltc_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &nve0_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass; + device->oclass[NVDEV_SUBDEV_VM ] = &nvc0_vmmgr_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &nvc0_bar_oclass; + device->oclass[NVDEV_SUBDEV_PWR ] = nv108_pwr_oclass; + device->oclass[NVDEV_SUBDEV_VOLT ] = &nv40_volt_oclass; + device->oclass[NVDEV_ENGINE_DMAOBJ ] = nvd0_dmaeng_oclass; + device->oclass[NVDEV_ENGINE_FIFO ] = nv108_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = nvc0_software_oclass; + device->oclass[NVDEV_ENGINE_GR ] = nv108_graph_oclass; + device->oclass[NVDEV_ENGINE_DISP ] = nvf0_disp_oclass; + device->oclass[NVDEV_ENGINE_COPY0 ] = &nve0_copy0_oclass; + device->oclass[NVDEV_ENGINE_COPY1 ] = &nve0_copy1_oclass; + device->oclass[NVDEV_ENGINE_COPY2 ] = &nve0_copy2_oclass; + device->oclass[NVDEV_ENGINE_BSP ] = &nve0_bsp_oclass; + device->oclass[NVDEV_ENGINE_VP ] = &nve0_vp_oclass; + device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; + break; case 0x108: device->cname = "GK208"; device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c index 5e58bba0dd5c..a7a890fad1e5 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c @@ -44,8 +44,10 @@ static void pramin_fini(void *data) { struct priv *priv = data; - nv_wr32(priv->bios, 0x001700, priv->bar0); - kfree(priv); + if (priv) { + nv_wr32(priv->bios, 0x001700, priv->bar0); + kfree(priv); + } } static void * diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c index 00f2ca7e44a5..033a8e999497 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c @@ -24,34 +24,71 @@ #include "nv50.h" +struct nvaa_ram_priv { + struct nouveau_ram base; + u64 poller_base; +}; + static int nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nouveau_oclass *oclass, void *data, u32 datasize, struct nouveau_object **pobject) { - const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ - const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ + u32 rsvd_head = ( 256 * 1024); /* vga memory */ + u32 rsvd_tail = (1024 * 1024); /* vbios etc */ struct nouveau_fb *pfb = nouveau_fb(parent); - struct nouveau_ram *ram; + struct nvaa_ram_priv *priv; int ret; - ret = nouveau_ram_create(parent, engine, oclass, &ram); - *pobject = nv_object(ram); + ret = nouveau_ram_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); if (ret) return ret; - ram->size = nv_rd32(pfb, 0x10020c); - ram->size = (ram->size & 0xffffff00) | ((ram->size & 0x000000ff) << 32); + priv->base.type = NV_MEM_TYPE_STOLEN; + priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; + priv->base.size = (u64)nv_rd32(pfb, 0x100e14) << 12; - ret = nouveau_mm_init(&pfb->vram, rsvd_head, (ram->size >> 12) - - (rsvd_head + rsvd_tail), 1); + rsvd_tail += 0x1000; + priv->poller_base = priv->base.size - rsvd_tail; + + ret = nouveau_mm_init(&pfb->vram, rsvd_head >> 12, + (priv->base.size - (rsvd_head + rsvd_tail)) >> 12, + 1); if (ret) return ret; - ram->type = NV_MEM_TYPE_STOLEN; - ram->stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; - ram->get = nv50_ram_get; - ram->put = nv50_ram_put; + priv->base.get = nv50_ram_get; + priv->base.put = nv50_ram_put; + return 0; +} + +static int +nvaa_ram_init(struct nouveau_object *object) +{ + struct nouveau_fb *pfb = nouveau_fb(object); + struct nvaa_ram_priv *priv = (void *)object; + int ret; + u64 dniso, hostnb, flush; + + ret = nouveau_ram_init(&priv->base); + if (ret) + return ret; + + dniso = ((priv->base.size - (priv->poller_base + 0x00)) >> 5) - 1; + hostnb = ((priv->base.size - (priv->poller_base + 0x20)) >> 5) - 1; + flush = ((priv->base.size - (priv->poller_base + 0x40)) >> 5) - 1; + + /* Enable NISO poller for various clients and set their associated + * read address, only for MCP77/78 and MCP79/7A. (fd#25701) + */ + nv_wr32(pfb, 0x100c18, dniso); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000001); + nv_wr32(pfb, 0x100c1c, hostnb); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000002); + nv_wr32(pfb, 0x100c24, flush); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00010000); + return 0; } @@ -60,7 +97,7 @@ nvaa_ram_oclass = { .ofuncs = &(struct nouveau_ofuncs) { .ctor = nvaa_ram_ctor, .dtor = _nouveau_ram_dtor, - .init = _nouveau_ram_init, + .init = nvaa_ram_init, .fini = _nouveau_ram_fini, }, }; diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c index a75c35ccf25c..165401c4045c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c @@ -24,13 +24,6 @@ #include "nv04.h" -static void -nv4c_mc_msi_rearm(struct nouveau_mc *pmc) -{ - struct nv04_mc_priv *priv = (void *)pmc; - nv_wr08(priv, 0x088050, 0xff); -} - struct nouveau_oclass * nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .base.handle = NV_SUBDEV(MC, 0x4c), @@ -41,5 +34,4 @@ nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .fini = _nouveau_mc_fini, }, .intr = nv04_mc_intr, - .msi_rearm = nv4c_mc_msi_rearm, }.base; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 21ec561edc99..bba2960d3dfb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1572,8 +1572,10 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) * so use the DMA API for them. */ if (!nv_device_is_cpu_coherent(device) && - ttm->caching_state == tt_uncached) + ttm->caching_state == tt_uncached) { ttm_dma_unpopulate(ttm_dma, dev->dev); + return; + } #if __OS_HAS_AGP if (drm->agp.stat == ENABLED) { diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 5d93902a91ab..f8042433752b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -876,7 +876,6 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev, if (ret) return ret; - bo->gem.dumb = true; ret = drm_gem_handle_create(file_priv, &bo->gem, &args->handle); drm_gem_object_unreference_unlocked(&bo->gem); return ret; @@ -892,14 +891,6 @@ nouveau_display_dumb_map_offset(struct drm_file *file_priv, gem = drm_gem_object_lookup(dev, file_priv, handle); if (gem) { struct nouveau_bo *bo = nouveau_gem_object(gem); - - /* - * We don't allow dumb mmaps on objects created using another - * interface. - */ - WARN_ONCE(!(gem->dumb || gem->import_attach), - "Illegal dumb map of accelerated buffer.\n"); - *poffset = drm_vma_node_offset_addr(&bo->bo.vma_node); drm_gem_object_unreference_unlocked(gem); return 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 28d51a22a4bf..bf0f9e21d714 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -36,7 +36,14 @@ void nouveau_gem_object_del(struct drm_gem_object *gem) { struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_buffer_object *bo = &nvbo->bo; + struct device *dev = drm->dev->dev; + int ret; + + ret = pm_runtime_get_sync(dev); + if (WARN_ON(ret < 0 && ret != -EACCES)) + return; if (gem->import_attach) drm_prime_gem_destroy(gem, nvbo->bo.sg); @@ -46,6 +53,9 @@ nouveau_gem_object_del(struct drm_gem_object *gem) /* reset filp so nouveau_bo_del_ttm() can test for it */ gem->filp = NULL; ttm_bo_unref(&bo); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } int @@ -53,7 +63,9 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct nouveau_vma *vma; + struct device *dev = drm->dev->dev; int ret; if (!cli->vm) @@ -71,11 +83,16 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) goto out; } + ret = pm_runtime_get_sync(dev); + if (ret < 0 && ret != -EACCES) + goto out; + ret = nouveau_bo_vma_add(nvbo, cli->vm, vma); - if (ret) { + if (ret) kfree(vma); - goto out; - } + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } else { vma->refcount++; } @@ -129,6 +146,8 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); + struct device *dev = drm->dev->dev; struct nouveau_vma *vma; int ret; @@ -141,8 +160,14 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) vma = nouveau_bo_vma_find(nvbo, cli->vm); if (vma) { - if (--vma->refcount == 0) - nouveau_gem_object_unmap(nvbo, vma); + if (--vma->refcount == 0) { + ret = pm_runtime_get_sync(dev); + if (!WARN_ON(ret < 0 && ret != -EACCES)) { + nouveau_gem_object_unmap(nvbo, vma); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + } + } } ttm_bo_unreserve(&nvbo->bo); } @@ -444,9 +469,6 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, list_for_each_entry(nvbo, list, entry) { struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[nvbo->pbbo_index]; - WARN_ONCE(nvbo->gem.dumb, - "GPU use of dumb buffer is illegal.\n"); - ret = nouveau_gem_set_domain(&nvbo->gem, b->read_domains, b->write_domains, b->valid_domains); diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 753a6def61e7..3d1cfcb96b6b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -28,6 +28,7 @@ #include "nouveau_ttm.h" #include "nouveau_gem.h" +#include "drm_legacy.h" static int nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) { @@ -281,7 +282,7 @@ nouveau_ttm_mmap(struct file *filp, struct vm_area_struct *vma) struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev); if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) - return -EINVAL; + return drm_legacy_mmap(filp, vma); return ttm_bo_mmap(filp, vma, &drm->ttm.bdev); } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index d59ec491dbb9..ed644a4f6f57 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1851,10 +1851,9 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) return pll; } /* otherwise, pick one of the plls */ - if ((rdev->family == CHIP_KAVERI) || - (rdev->family == CHIP_KABINI) || + if ((rdev->family == CHIP_KABINI) || (rdev->family == CHIP_MULLINS)) { - /* KB/KV/ML has PPLL1 and PPLL2 */ + /* KB/ML has PPLL1 and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -1863,7 +1862,7 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; } else { - /* CI has PPLL0, PPLL1, and PPLL2 */ + /* CI/KV has PPLL0, PPLL1, and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -2155,6 +2154,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) case ATOM_PPLL0: /* disable the ppll */ if ((rdev->family == CHIP_ARUBA) || + (rdev->family == CHIP_KAVERI) || (rdev->family == CHIP_BONAIRE) || (rdev->family == CHIP_HAWAII)) atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 11ba9d21b89b..db42a670f995 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -492,6 +492,10 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct radeon_connector_atom_dig *dig_connector; int dp_clock; + if ((mode->clock > 340000) && + (!radeon_connector_is_dp12_capable(connector))) + return MODE_CLOCK_HIGH; + if (!radeon_connector->con_priv) return MODE_CLOCK_HIGH; dig_connector = radeon_connector->con_priv; diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index ba85986febea..03003f8a6de6 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2156,4 +2156,6 @@ #define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu #define ATC_VM_APERTURE1_LOW_ADDR 0x3304u +#define IH_VMID_0_LUT 0x3D40u + #endif diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c index 2fe8cfc966d9..bafdf92a5732 100644 --- a/drivers/gpu/drm/radeon/dce3_1_afmt.c +++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c @@ -103,7 +103,7 @@ static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); return; } diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 9b42001295ba..e3e9c10cfba9 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2745,13 +2745,11 @@ int kv_dpm_init(struct radeon_device *rdev) pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; if (radeon_bapm == -1) { - /* There are stability issues reported on with - * bapm enabled on an asrock system. - */ - if (rdev->pdev->subsystem_vendor == 0x1849) - pi->bapm_enable = false; - else + /* only enable bapm on KB, ML by default */ + if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) pi->bapm_enable = true; + else + pi->bapm_enable = false; } else if (radeon_bapm == 0) { pi->bapm_enable = false; } else { diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index fe48f229043e..a46f73737994 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -394,10 +394,9 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, return r; } -static int radeon_mode_mmap(struct drm_file *filp, - struct drm_device *dev, - uint32_t handle, bool dumb, - uint64_t *offset_p) +int radeon_mode_dumb_mmap(struct drm_file *filp, + struct drm_device *dev, + uint32_t handle, uint64_t *offset_p) { struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -406,14 +405,6 @@ static int radeon_mode_mmap(struct drm_file *filp, if (gobj == NULL) { return -ENOENT; } - - /* - * We don't allow dumb mmaps on objects created using another - * interface. - */ - WARN_ONCE(dumb && !(gobj->dumb || gobj->import_attach), - "Illegal dumb map of GPU buffer.\n"); - robj = gem_to_radeon_bo(gobj); if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) { drm_gem_object_unreference_unlocked(gobj); @@ -424,20 +415,12 @@ static int radeon_mode_mmap(struct drm_file *filp, return 0; } -int radeon_mode_dumb_mmap(struct drm_file *filp, - struct drm_device *dev, - uint32_t handle, uint64_t *offset_p) -{ - return radeon_mode_mmap(filp, dev, handle, true, offset_p); -} - int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct drm_radeon_gem_mmap *args = data; - return radeon_mode_mmap(filp, dev, args->handle, false, - &args->addr_ptr); + return radeon_mode_dumb_mmap(filp, dev, args->handle, &args->addr_ptr); } int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, @@ -763,7 +746,6 @@ int radeon_mode_dumb_create(struct drm_file *file_priv, return -ENOMEM; r = drm_gem_handle_create(file_priv, gobj, &handle); - gobj->dumb = true; /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 065d02068ec3..8bf87f1203cc 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -28,6 +28,8 @@ #include "cikd.h" #include "cik_reg.h" #include "radeon_kfd.h" +#include "radeon_ucode.h" +#include <linux/firmware.h> #define CIK_PIPE_PER_MEC (4) @@ -49,6 +51,7 @@ static uint64_t get_vmem_size(struct kgd_dev *kgd); static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); /* * Register access functions @@ -69,7 +72,7 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, @@ -89,14 +92,16 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_memory = kgd_init_memory, .init_pipeline = kgd_init_pipeline, .hqd_load = kgd_hqd_load, - .hqd_is_occupies = kgd_hqd_is_occupies, + .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_destroy = kgd_hqd_destroy, + .get_fw_version = get_fw_version }; static const struct kgd2kfd_calls *kgd2kfd; bool radeon_kfd_init(void) { +#if defined(CONFIG_HSA_AMD_MODULE) bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*, const struct kgd2kfd_calls**); @@ -113,6 +118,17 @@ bool radeon_kfd_init(void) } return true; +#elif defined(CONFIG_HSA_AMD) + if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) { + kgd2kfd = NULL; + + return false; + } + + return true; +#else + return false; +#endif } void radeon_kfd_fini(void) @@ -374,6 +390,10 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, cpu_relax(); write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + /* Mapping vmid to pasid also for IH block */ + write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), + pasid_mapping); + return 0; } @@ -513,7 +533,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { uint32_t act; @@ -552,6 +572,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, if (timeout == 0) { pr_err("kfd: cp queue preemption time out (%dms)\n", temp); + release_queue(kgd); return -ETIME; } msleep(20); @@ -561,3 +582,52 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, release_queue(kgd); return 0; } + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + const union radeon_firmware_header *hdr; + + BUG_ON(kgd == NULL || rdev->mec_fw == NULL); + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union radeon_firmware_header *) rdev->me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union radeon_firmware_header *) rdev->ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union radeon_firmware_header *) rdev->mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union radeon_firmware_header *) + rdev->mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; + break; + + case KGD_ENGINE_SDMA: + hdr = (const union radeon_firmware_header *) + rdev->sdma_fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 7d68223eb469..86fc56434b28 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -529,9 +529,6 @@ int radeon_bo_list_validate(struct radeon_device *rdev, u32 current_domain = radeon_mem_type_to_domain(bo->tbo.mem.mem_type); - WARN_ONCE(bo->gem_base.dumb, - "GPU use of dumb buffer is illegal.\n"); - /* Check if this buffer will be moved and don't move it * if we have moved too many buffers for this IB already. * diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 535403e0c8a2..15aee723db77 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1703,7 +1703,7 @@ static int radeon_cp_dispatch_texture(struct drm_device * dev, u32 format; u32 *buffer; const u8 __user *data; - int size, dwords, tex_width, blit_width, spitch; + unsigned int size, dwords, tex_width, blit_width, spitch; u32 height; int i; u32 texpitch, microtile; diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 3367960286a6..978993fa3a36 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -168,7 +168,7 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, const struct tegra_dc_window *window) { unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp; - unsigned long value; + unsigned long value, flags; bool yuv, planar; /* @@ -181,6 +181,8 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, else bpp = planar ? 1 : 2; + spin_lock_irqsave(&dc->lock, flags); + value = WINDOW_A_SELECT << index; tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER); @@ -273,6 +275,7 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, case TEGRA_BO_TILING_MODE_BLOCK: DRM_ERROR("hardware doesn't support block linear mode\n"); + spin_unlock_irqrestore(&dc->lock, flags); return -EINVAL; } @@ -331,6 +334,8 @@ static int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index, tegra_dc_window_commit(dc, index); + spin_unlock_irqrestore(&dc->lock, flags); + return 0; } @@ -338,11 +343,14 @@ static int tegra_window_plane_disable(struct drm_plane *plane) { struct tegra_dc *dc = to_tegra_dc(plane->crtc); struct tegra_plane *p = to_tegra_plane(plane); + unsigned long flags; u32 value; if (!plane->crtc) return 0; + spin_lock_irqsave(&dc->lock, flags); + value = WINDOW_A_SELECT << p->index; tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER); @@ -352,6 +360,8 @@ static int tegra_window_plane_disable(struct drm_plane *plane) tegra_dc_window_commit(dc, p->index); + spin_unlock_irqrestore(&dc->lock, flags); + return 0; } @@ -699,14 +709,16 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y, struct tegra_bo *bo = tegra_fb_get_plane(fb, 0); unsigned int h_offset = 0, v_offset = 0; struct tegra_bo_tiling tiling; + unsigned long value, flags; unsigned int format, swap; - unsigned long value; int err; err = tegra_fb_get_tiling(fb, &tiling); if (err < 0) return err; + spin_lock_irqsave(&dc->lock, flags); + tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER); value = fb->offsets[0] + y * fb->pitches[0] + @@ -752,6 +764,7 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y, case TEGRA_BO_TILING_MODE_BLOCK: DRM_ERROR("hardware doesn't support block linear mode\n"); + spin_unlock_irqrestore(&dc->lock, flags); return -EINVAL; } @@ -778,6 +791,8 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y, tegra_dc_writel(dc, value << 8, DC_CMD_STATE_CONTROL); tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + spin_unlock_irqrestore(&dc->lock, flags); + return 0; } @@ -814,23 +829,32 @@ static void tegra_dc_finish_page_flip(struct tegra_dc *dc) unsigned long flags, base; struct tegra_bo *bo; - if (!dc->event) + spin_lock_irqsave(&drm->event_lock, flags); + + if (!dc->event) { + spin_unlock_irqrestore(&drm->event_lock, flags); return; + } bo = tegra_fb_get_plane(crtc->primary->fb, 0); + spin_lock_irqsave(&dc->lock, flags); + /* check if new start address has been latched */ + tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER); tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS); base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR); tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS); + spin_unlock_irqrestore(&dc->lock, flags); + if (base == bo->paddr + crtc->primary->fb->offsets[0]) { - spin_lock_irqsave(&drm->event_lock, flags); - drm_send_vblank_event(drm, dc->pipe, dc->event); - drm_vblank_put(drm, dc->pipe); + drm_crtc_send_vblank_event(crtc, dc->event); + drm_crtc_vblank_put(crtc); dc->event = NULL; - spin_unlock_irqrestore(&drm->event_lock, flags); } + + spin_unlock_irqrestore(&drm->event_lock, flags); } void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) @@ -843,7 +867,7 @@ void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) if (dc->event && dc->event->base.file_priv == file) { dc->event->base.destroy(&dc->event->base); - drm_vblank_put(drm, dc->pipe); + drm_crtc_vblank_put(crtc); dc->event = NULL; } @@ -853,16 +877,16 @@ void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t page_flip_flags) { + unsigned int pipe = drm_crtc_index(crtc); struct tegra_dc *dc = to_tegra_dc(crtc); - struct drm_device *drm = crtc->dev; if (dc->event) return -EBUSY; if (event) { - event->pipe = dc->pipe; + event->pipe = pipe; dc->event = event; - drm_vblank_get(drm, dc->pipe); + drm_crtc_vblank_get(crtc); } tegra_dc_set_base(dc, 0, 0, fb); @@ -1127,7 +1151,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); */ - drm_handle_vblank(dc->base.dev, dc->pipe); + drm_crtc_handle_vblank(&dc->base); tegra_dc_finish_page_flip(dc); } diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index e549afeece1f..d4f827593dfa 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -694,24 +694,28 @@ static const struct file_operations tegra_drm_fops = { .llseek = noop_llseek, }; -static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, int pipe) +static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, + unsigned int pipe) { struct drm_crtc *crtc; list_for_each_entry(crtc, &drm->mode_config.crtc_list, head) { - struct tegra_dc *dc = to_tegra_dc(crtc); - - if (dc->pipe == pipe) + if (pipe == drm_crtc_index(crtc)) return crtc; } return NULL; } -static u32 tegra_drm_get_vblank_counter(struct drm_device *dev, int crtc) +static u32 tegra_drm_get_vblank_counter(struct drm_device *drm, int pipe) { + struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe); + + if (!crtc) + return 0; + /* TODO: implement real hardware counter using syncpoints */ - return drm_vblank_count(dev, crtc); + return drm_crtc_vblank_count(crtc); } static int tegra_drm_enable_vblank(struct drm_device *drm, int pipe) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index da32086cbeaf..8777b7f75791 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -216,32 +216,58 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo) } } -static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo, - size_t size) +static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo) { + struct scatterlist *s; + struct sg_table *sgt; + unsigned int i; + bo->pages = drm_gem_get_pages(&bo->gem); if (IS_ERR(bo->pages)) return PTR_ERR(bo->pages); - bo->num_pages = size >> PAGE_SHIFT; - - bo->sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages); - if (IS_ERR(bo->sgt)) { - drm_gem_put_pages(&bo->gem, bo->pages, false, false); - return PTR_ERR(bo->sgt); + bo->num_pages = bo->gem.size >> PAGE_SHIFT; + + sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages); + if (IS_ERR(sgt)) + goto put_pages; + + /* + * Fake up the SG table so that dma_map_sg() can be used to flush the + * pages associated with it. Note that this relies on the fact that + * the DMA API doesn't hook into IOMMU on Tegra, therefore mapping is + * only cache maintenance. + * + * TODO: Replace this by drm_clflash_sg() once it can be implemented + * without relying on symbols that are not exported. + */ + for_each_sg(sgt->sgl, s, sgt->nents, i) + sg_dma_address(s) = sg_phys(s); + + if (dma_map_sg(drm->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE) == 0) { + sgt = ERR_PTR(-ENOMEM); + goto release_sgt; } + bo->sgt = sgt; + return 0; + +release_sgt: + sg_free_table(sgt); + kfree(sgt); +put_pages: + drm_gem_put_pages(&bo->gem, bo->pages, false, false); + return PTR_ERR(sgt); } -static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo, - size_t size) +static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo) { struct tegra_drm *tegra = drm->dev_private; int err; if (tegra->domain) { - err = tegra_bo_get_pages(drm, bo, size); + err = tegra_bo_get_pages(drm, bo); if (err < 0) return err; @@ -251,6 +277,8 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo, return err; } } else { + size_t size = bo->gem.size; + bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr, GFP_KERNEL | __GFP_NOWARN); if (!bo->vaddr) { @@ -274,7 +302,7 @@ struct tegra_bo *tegra_bo_create(struct drm_device *drm, size_t size, if (IS_ERR(bo)) return bo; - err = tegra_bo_alloc(drm, bo, size); + err = tegra_bo_alloc(drm, bo); if (err < 0) goto release; |