summaryrefslogtreecommitdiffstats
path: root/drivers/vdpa/vdpa_user
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
commit8ccd54fe45713cd458015b5b08d6098545e70543 (patch)
tree12a034b2ee42d681b2e915815c4529d6f246bcc4 /drivers/vdpa/vdpa_user
parent0835b5ee8704aef4e19b369237a762c52c7b6fb1 (diff)
parentc82729e06644f4e087f5ff0f91b8fb15e03b8890 (diff)
downloadlinux-stable-8ccd54fe45713cd458015b5b08d6098545e70543.tar.gz
linux-stable-8ccd54fe45713cd458015b5b08d6098545e70543.tar.bz2
linux-stable-8ccd54fe45713cd458015b5b08d6098545e70543.zip
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: "virtio,vhost,vdpa: features, fixes, and cleanups: - reduction in interrupt rate in virtio - perf improvement for VDUSE - scalability for vhost-scsi - non power of 2 ring support for packed rings - better management for mlx5 vdpa - suspend for snet - VIRTIO_F_NOTIFICATION_DATA - shared backend with vdpa-sim-blk - user VA support in vdpa-sim - better struct packing for virtio and fixes, cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (52 commits) vhost_vdpa: fix unmap process in no-batch mode MAINTAINERS: make me a reviewer of VIRTIO CORE AND NET DRIVERS tools/virtio: fix build caused by virtio_ring changes virtio_ring: add a struct device forward declaration vdpa_sim_blk: support shared backend vdpa_sim: move buffer allocation in the devices vdpa/snet: use likely/unlikely macros in hot functions vdpa/snet: implement kick_vq_with_data callback virtio-vdpa: add VIRTIO_F_NOTIFICATION_DATA feature support virtio: add VIRTIO_F_NOTIFICATION_DATA feature support vdpa/snet: support the suspend vDPA callback vdpa/snet: support getting and setting VQ state MAINTAINERS: add vringh.h to Virtio Core and Net Drivers vringh: address kdoc warnings vdpa: address kdoc warnings virtio_ring: don't update event idx on get_buf vdpa_sim: add support for user VA vdpa_sim: replace the spinlock with a mutex to protect the state vdpa_sim: use kthread worker vdpa_sim: make devices agnostic for work management ...
Diffstat (limited to 'drivers/vdpa/vdpa_user')
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c414
1 files changed, 351 insertions, 63 deletions
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index c421b83f6fa2..de97e38c3b82 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -37,10 +37,15 @@
#define DRV_LICENSE "GPL v2"
#define VDUSE_DEV_MAX (1U << MINORBITS)
+#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
+#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
-#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
+/* 128 MB reserved for virtqueue creation */
+#define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+#define IRQ_UNBOUND -1
+
struct vduse_virtqueue {
u16 index;
u16 num_max;
@@ -57,6 +62,9 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+ int irq_effective_cpu;
+ struct cpumask irq_affinity;
+ struct kobject kobj;
};
struct vduse_dev;
@@ -76,7 +84,7 @@ struct vduse_umem {
struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
- struct vduse_virtqueue *vqs;
+ struct vduse_virtqueue **vqs;
struct vduse_iova_domain *domain;
char *name;
struct mutex lock;
@@ -106,6 +114,8 @@ struct vduse_dev {
u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
+ unsigned int bounce_size;
+ struct mutex domain_lock;
};
struct vduse_dev_msg {
@@ -128,6 +138,7 @@ static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
static u32 allowed_device_id[] = {
VIRTIO_ID_BLOCK,
@@ -419,7 +430,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
struct vduse_iova_domain *domain = dev->domain;
/* The coherent mappings are handled in vduse_dev_free_coherent() */
- if (domain->bounce_map)
+ if (domain && domain->bounce_map)
vduse_domain_reset_bounce_map(domain);
down_write(&dev->rwsem);
@@ -434,7 +445,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
flush_work(&dev->inject);
for (i = 0; i < dev->vq_num; i++) {
- struct vduse_virtqueue *vq = &dev->vqs[i];
+ struct vduse_virtqueue *vq = dev->vqs[i];
vq->ready = false;
vq->desc_addr = 0;
@@ -453,6 +464,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
spin_lock(&vq->irq_lock);
vq->cb.callback = NULL;
vq->cb.private = NULL;
+ vq->cb.trigger = NULL;
spin_unlock(&vq->irq_lock);
flush_work(&vq->inject);
flush_work(&vq->kick);
@@ -466,7 +478,7 @@ static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 device_area)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->desc_addr = desc_area;
vq->driver_addr = driver_area;
@@ -500,7 +512,7 @@ static void vduse_vq_kick_work(struct work_struct *work)
static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (!eventfd_signal_allowed()) {
schedule_work(&vq->kick);
@@ -513,18 +525,19 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
struct vdpa_callback *cb)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
spin_lock(&vq->irq_lock);
vq->cb.callback = cb->callback;
vq->cb.private = cb->private;
+ vq->cb.trigger = cb->trigger;
spin_unlock(&vq->irq_lock);
}
static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->num = num;
}
@@ -533,7 +546,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
u16 idx, bool ready)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->ready = ready;
}
@@ -541,7 +554,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
return vq->ready;
}
@@ -550,7 +563,7 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
const struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
vq->state.packed.last_avail_counter =
@@ -569,7 +582,7 @@ static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
@@ -624,8 +637,8 @@ static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
int i;
for (i = 0; i < dev->vq_num; i++)
- if (num_max < dev->vqs[i].num_max)
- num_max = dev->vqs[i].num_max;
+ if (num_max < dev->vqs[i]->num_max)
+ num_max = dev->vqs[i]->num_max;
return num_max;
}
@@ -708,6 +721,23 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
return dev->generation;
}
+static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
+ const struct cpumask *cpu_mask)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+ return 0;
+}
+
+static const struct cpumask *
+vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return &dev->vqs[idx]->irq_affinity;
+}
+
static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
unsigned int asid,
struct vhost_iotlb *iotlb)
@@ -758,6 +788,8 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+ .set_vq_affinity = vduse_vdpa_set_vq_affinity,
+ .get_vq_affinity = vduse_vdpa_get_vq_affinity,
.reset = vduse_vdpa_reset,
.set_map = vduse_vdpa_set_map,
.free = vduse_vdpa_free,
@@ -863,7 +895,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
return -EINVAL;
index = array_index_nospec(eventfd->index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
if (eventfd->fd >= 0) {
ctx = eventfd_ctx_fdget(eventfd->fd);
if (IS_ERR(ctx))
@@ -889,7 +921,7 @@ static bool vduse_dev_is_ready(struct vduse_dev *dev)
int i;
for (i = 0; i < dev->vq_num; i++)
- if (!dev->vqs[i].num_max)
+ if (!dev->vqs[i]->num_max)
return false;
return true;
@@ -916,8 +948,26 @@ static void vduse_vq_irq_inject(struct work_struct *work)
spin_unlock_irq(&vq->irq_lock);
}
+static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
+{
+ bool signal = false;
+
+ if (!vq->cb.trigger)
+ return false;
+
+ spin_lock_irq(&vq->irq_lock);
+ if (vq->ready && vq->cb.trigger) {
+ eventfd_signal(vq->cb.trigger, 1);
+ signal = true;
+ }
+ spin_unlock_irq(&vq->irq_lock);
+
+ return signal;
+}
+
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
- struct work_struct *irq_work)
+ struct work_struct *irq_work,
+ int irq_effective_cpu)
{
int ret = -EINVAL;
@@ -926,7 +976,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
goto unlock;
ret = 0;
- queue_work(vduse_irq_wq, irq_work);
+ if (irq_effective_cpu == IRQ_UNBOUND)
+ queue_work(vduse_irq_wq, irq_work);
+ else
+ queue_work_on(irq_effective_cpu,
+ vduse_irq_bound_wq, irq_work);
unlock:
up_read(&dev->rwsem);
@@ -944,6 +998,9 @@ static int vduse_dev_dereg_umem(struct vduse_dev *dev,
goto unlock;
ret = -EINVAL;
+ if (!dev->domain)
+ goto unlock;
+
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
@@ -970,7 +1027,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
unsigned long npages, lock_limit;
int ret;
- if (!dev->domain->bounce_map ||
+ if (!dev->domain || !dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
@@ -1029,6 +1086,22 @@ unlock:
return ret;
}
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+ int curr_cpu = vq->irq_effective_cpu;
+
+ while (true) {
+ curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+ if (cpu_online(curr_cpu))
+ break;
+
+ if (curr_cpu >= nr_cpu_ids)
+ curr_cpu = IRQ_UNBOUND;
+ }
+
+ vq->irq_effective_cpu = curr_cpu;
+}
+
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1044,7 +1117,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
struct vduse_iotlb_entry entry;
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
- struct vduse_iova_domain *domain = dev->domain;
struct file *f = NULL;
ret = -EFAULT;
@@ -1055,8 +1127,13 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (entry.start > entry.last)
break;
- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
entry.start, entry.last);
if (map) {
map_file = (struct vdpa_map_file *)map->opaque;
@@ -1066,7 +1143,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
entry.last = map->last;
entry.perm = map->perm;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
ret = -EINVAL;
if (!f)
break;
@@ -1111,7 +1189,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
}
case VDUSE_DEV_INJECT_CONFIG_IRQ:
- ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
break;
case VDUSE_VQ_SETUP: {
struct vduse_vq_config config;
@@ -1130,7 +1208,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(config.index, dev->vq_num);
- dev->vqs[index].num_max = config.max_size;
+ dev->vqs[index]->num_max = config.max_size;
ret = 0;
break;
}
@@ -1148,7 +1226,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(vq_info.index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
vq_info.desc_addr = vq->desc_addr;
vq_info.driver_addr = vq->driver_addr;
vq_info.device_addr = vq->device_addr;
@@ -1197,8 +1275,14 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (index >= dev->vq_num)
break;
+ ret = 0;
index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
+ if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
+ vduse_vq_update_effective_cpu(dev->vqs[index]);
+ ret = vduse_dev_queue_irq_work(dev,
+ &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
+ }
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1213,8 +1297,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(umem.reserved)))
break;
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
@@ -1228,15 +1314,15 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
-
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
- struct vduse_iova_domain *domain = dev->domain;
ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
@@ -1250,18 +1336,24 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(info.reserved)))
break;
- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
- if (domain->bounce_map && map->start == 0 &&
- map->last == domain->bounce_size - 1)
+ if (dev->domain->bounce_map && map->start == 0 &&
+ map->last == dev->domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
if (!map)
break;
@@ -1284,7 +1376,10 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;
- vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_unlock(&dev->domain_lock);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1339,6 +1434,151 @@ static const struct file_operations vduse_dev_fops = {
.llseek = noop_llseek,
};
+static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
+{
+ return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
+}
+
+static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
+ const char *buf, size_t count)
+{
+ cpumask_var_t new_value;
+ int ret;
+
+ if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, new_value);
+ if (ret)
+ goto free_mask;
+
+ ret = -EINVAL;
+ if (!cpumask_intersects(new_value, cpu_online_mask))
+ goto free_mask;
+
+ cpumask_copy(&vq->irq_affinity, new_value);
+ ret = count;
+free_mask:
+ free_cpumask_var(new_value);
+ return ret;
+}
+
+struct vq_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
+ ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
+ size_t count);
+};
+
+static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
+
+static struct attribute *vq_attrs[] = {
+ &irq_cb_affinity_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vq);
+
+static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(vq, buf);
+}
+
+static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(vq, buf, count);
+}
+
+static const struct sysfs_ops vq_sysfs_ops = {
+ .show = vq_attr_show,
+ .store = vq_attr_store,
+};
+
+static void vq_release(struct kobject *kobj)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ kfree(vq);
+}
+
+static const struct kobj_type vq_type = {
+ .release = vq_release,
+ .sysfs_ops = &vq_sysfs_ops,
+ .default_groups = vq_groups,
+};
+
+static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
+{
+ int i;
+
+ if (!dev->vqs)
+ return;
+
+ for (i = 0; i < dev->vq_num; i++)
+ kobject_put(&dev->vqs[i]->kobj);
+ kfree(dev->vqs);
+}
+
+static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
+{
+ int ret, i;
+
+ dev->vq_align = vq_align;
+ dev->vq_num = vq_num;
+ dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
+ if (!dev->vqs)
+ return -ENOMEM;
+
+ for (i = 0; i < vq_num; i++) {
+ dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
+ if (!dev->vqs[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ dev->vqs[i]->index = i;
+ dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
+ INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
+ INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
+ spin_lock_init(&dev->vqs[i]->kick_lock);
+ spin_lock_init(&dev->vqs[i]->irq_lock);
+ cpumask_setall(&dev->vqs[i]->irq_affinity);
+
+ kobject_init(&dev->vqs[i]->kobj, &vq_type);
+ ret = kobject_add(&dev->vqs[i]->kobj,
+ &dev->dev->kobj, "vq%d", i);
+ if (ret) {
+ kfree(dev->vqs[i]);
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ while (i--)
+ kobject_put(&dev->vqs[i]->kobj);
+ kfree(dev->vqs);
+ dev->vqs = NULL;
+ return ret;
+}
+
static struct vduse_dev *vduse_dev_create(void)
{
struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1348,6 +1588,7 @@ static struct vduse_dev *vduse_dev_create(void)
mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
+ mutex_init(&dev->domain_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);
@@ -1396,8 +1637,9 @@ static int vduse_destroy_dev(char *name)
device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
idr_remove(&vduse_idr, dev->minor);
kvfree(dev->config);
- kfree(dev->vqs);
- vduse_domain_destroy(dev->domain);
+ vduse_dev_deinit_vqs(dev);
+ if (dev->domain)
+ vduse_domain_destroy(dev->domain);
kfree(dev->name);
vduse_dev_destroy(dev);
module_put(THIS_MODULE);
@@ -1476,8 +1718,48 @@ static ssize_t msg_timeout_store(struct device *device,
static DEVICE_ATTR_RW(msg_timeout);
+static ssize_t bounce_size_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+
+ return sysfs_emit(buf, "%u\n", dev->bounce_size);
+}
+
+static ssize_t bounce_size_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+ unsigned int bounce_size;
+ int ret;
+
+ ret = -EPERM;
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ goto unlock;
+
+ ret = kstrtouint(buf, 10, &bounce_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = -EINVAL;
+ if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
+ bounce_size < VDUSE_MIN_BOUNCE_SIZE)
+ goto unlock;
+
+ dev->bounce_size = bounce_size & PAGE_MASK;
+ ret = count;
+unlock:
+ mutex_unlock(&dev->domain_lock);
+ return ret;
+}
+
+static DEVICE_ATTR_RW(bounce_size);
+
static struct attribute *vduse_dev_attrs[] = {
&dev_attr_msg_timeout.attr,
+ &dev_attr_bounce_size.attr,
NULL
};
@@ -1486,7 +1768,7 @@ ATTRIBUTE_GROUPS(vduse_dev);
static int vduse_create_dev(struct vduse_dev_config *config,
void *config_buf, u64 api_version)
{
- int i, ret;
+ int ret;
struct vduse_dev *dev;
ret = -EEXIST;
@@ -1506,26 +1788,9 @@ static int vduse_create_dev(struct vduse_dev_config *config,
if (!dev->name)
goto err_str;
- dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
- VDUSE_BOUNCE_SIZE);
- if (!dev->domain)
- goto err_domain;
-
+ dev->bounce_size = VDUSE_BOUNCE_SIZE;
dev->config = config_buf;
dev->config_size = config->config_size;
- dev->vq_align = config->vq_align;
- dev->vq_num = config->vq_num;
- dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
- if (!dev->vqs)
- goto err_vqs;
-
- for (i = 0; i < dev->vq_num; i++) {
- dev->vqs[i].index = i;
- INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
- INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
- spin_lock_init(&dev->vqs[i].kick_lock);
- spin_lock_init(&dev->vqs[i].irq_lock);
- }
ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
if (ret < 0)
@@ -1540,16 +1805,19 @@ static int vduse_create_dev(struct vduse_dev_config *config,
ret = PTR_ERR(dev->dev);
goto err_dev;
}
+
+ ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
+ if (ret)
+ goto err_vqs;
+
__module_get(THIS_MODULE);
return 0;
+err_vqs:
+ device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
err_dev:
idr_remove(&vduse_idr, dev->minor);
err_idr:
- kfree(dev->vqs);
-err_vqs:
- vduse_domain_destroy(dev->domain);
-err_domain:
kfree(dev->name);
err_str:
vduse_dev_destroy(dev);
@@ -1716,9 +1984,23 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
if (ret)
return ret;
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain)
+ dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
+ dev->bounce_size);
+ mutex_unlock(&dev->domain_lock);
+ if (!dev->domain) {
+ put_device(&dev->vdev->vdpa.dev);
+ return -ENOMEM;
+ }
+
ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
if (ret) {
put_device(&dev->vdev->vdpa.dev);
+ mutex_lock(&dev->domain_lock);
+ vduse_domain_destroy(dev->domain);
+ dev->domain = NULL;
+ mutex_unlock(&dev->domain_lock);
return ret;
}
@@ -1824,12 +2106,15 @@ static int vduse_init(void)
if (ret)
goto err_cdev;
+ ret = -ENOMEM;
vduse_irq_wq = alloc_workqueue("vduse-irq",
WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
- if (!vduse_irq_wq) {
- ret = -ENOMEM;
+ if (!vduse_irq_wq)
goto err_wq;
- }
+
+ vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+ if (!vduse_irq_bound_wq)
+ goto err_bound_wq;
ret = vduse_domain_init();
if (ret)
@@ -1843,6 +2128,8 @@ static int vduse_init(void)
err_mgmtdev:
vduse_domain_exit();
err_domain:
+ destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
destroy_workqueue(vduse_irq_wq);
err_wq:
cdev_del(&vduse_cdev);
@@ -1862,6 +2149,7 @@ static void vduse_exit(void)
{
vduse_mgmtdev_exit();
vduse_domain_exit();
+ destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major);