// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020 Intel Corporation * Author: Johannes Berg */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "virt-pci.h" #define to_virtio_pcidev(_pdev) \ container_of(_pdev, struct virtio_pcidev_device, pdev) /* for MSI-X we have a 32-bit payload */ #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) #define NUM_IRQ_MSGS 10 struct virtio_pcidev_message_buffer { struct virtio_pcidev_msg hdr; u8 data[8]; }; struct virtio_pcidev_device { struct um_pci_device pdev; struct virtio_device *vdev; struct virtqueue *cmd_vq, *irq_vq; #define VIRTIO_PCIDEV_WRITE_BUFS 20 struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS); #define UM_PCI_STAT_WAITING 0 unsigned long status; bool platform; }; static unsigned int virtio_pcidev_max_delay_us = 40000; module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644); static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted) { int i; for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { if (!test_and_set_bit(i, dev->used_bufs)) return i; } *posted = false; return VIRTIO_PCIDEV_WRITE_BUFS; } static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf) { int i; if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) { kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]); dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL; return; } for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { if (buf == &dev->bufs[i]) { kfree(dev->extra_ptrs[i]); dev->extra_ptrs[i] = NULL; WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); return; } } WARN_ON(1); } static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev, struct virtio_pcidev_msg *cmd, unsigned int cmd_size, const void *extra, unsigned int extra_size, void *out, unsigned int out_size) { struct scatterlist out_sg, extra_sg, in_sg; struct scatterlist *sgs_list[] = { [0] = &out_sg, [1] = extra ? &extra_sg : &in_sg, [2] = extra ? &in_sg : NULL, }; struct virtio_pcidev_message_buffer *buf; int delay_count = 0; bool bounce_out; int ret, len; int buf_idx; bool posted; if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) return -EINVAL; switch (cmd->op) { case VIRTIO_PCIDEV_OP_CFG_WRITE: case VIRTIO_PCIDEV_OP_MMIO_WRITE: case VIRTIO_PCIDEV_OP_MMIO_MEMSET: /* in PCI, writes are posted, so don't wait */ posted = !out; WARN_ON(!posted); break; default: posted = false; break; } bounce_out = !posted && cmd_size <= sizeof(*cmd) && out && out_size <= sizeof(buf->data); buf_idx = virtio_pcidev_get_buf(dev, &posted); buf = &dev->bufs[buf_idx]; memcpy(buf, cmd, cmd_size); if (posted && extra && extra_size > sizeof(buf) - cmd_size) { dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, GFP_ATOMIC); if (!dev->extra_ptrs[buf_idx]) { virtio_pcidev_free_buf(dev, buf); return -ENOMEM; } extra = dev->extra_ptrs[buf_idx]; } else if (extra && extra_size <= sizeof(buf) - cmd_size) { memcpy((u8 *)buf + cmd_size, extra, extra_size); cmd_size += extra_size; extra_size = 0; extra = NULL; cmd = (void *)buf; } else { cmd = (void *)buf; } sg_init_one(&out_sg, cmd, cmd_size); if (extra) sg_init_one(&extra_sg, extra, extra_size); /* allow stack for small buffers */ if (bounce_out) sg_init_one(&in_sg, buf->data, out_size); else if (out) sg_init_one(&in_sg, out, out_size); /* add to internal virtio queue */ ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, extra ? 2 : 1, out ? 1 : 0, cmd, GFP_ATOMIC); if (ret) { virtio_pcidev_free_buf(dev, buf); return ret; } if (posted) { virtqueue_kick(dev->cmd_vq); return 0; } /* kick and poll for getting a response on the queue */ set_bit(UM_PCI_STAT_WAITING, &dev->status); virtqueue_kick(dev->cmd_vq); ret = 0; while (1) { void *completed = virtqueue_get_buf(dev->cmd_vq, &len); if (completed == buf) break; if (completed) virtio_pcidev_free_buf(dev, completed); if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || ++delay_count > virtio_pcidev_max_delay_us, "um virt-pci delay: %d", delay_count)) { ret = -EIO; break; } udelay(1); } clear_bit(UM_PCI_STAT_WAITING, &dev->status); if (bounce_out) memcpy(out, buf->data, out_size); virtio_pcidev_free_buf(dev, buf); return ret; } static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev, unsigned int offset, int size) { struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); struct virtio_pcidev_msg hdr = { .op = VIRTIO_PCIDEV_OP_CFG_READ, .size = size, .addr = offset, }; /* max 8, we might not use it all */ u8 data[8]; memset(data, 0xff, sizeof(data)); /* size has been checked in um_pci_cfgspace_read() */ if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) return ULONG_MAX; switch (size) { case 1: return data[0]; case 2: return le16_to_cpup((void *)data); case 4: return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: return le64_to_cpup((void *)data); #endif default: return ULONG_MAX; } } static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev, unsigned int offset, int size, unsigned long val) { struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); struct { struct virtio_pcidev_msg hdr; /* maximum size - we may only use parts of it */ u8 data[8]; } msg = { .hdr = { .op = VIRTIO_PCIDEV_OP_CFG_WRITE, .size = size, .addr = offset, }, }; /* size has been checked in um_pci_cfgspace_write() */ switch (size) { case 1: msg.data[0] = (u8)val; break; case 2: put_unaligned_le16(val, (void *)msg.data); break; case 4: put_unaligned_le32(val, (void *)msg.data); break; #ifdef CONFIG_64BIT case 8: put_unaligned_le64(val, (void *)msg.data); break; #endif } WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); } static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev, int bar, void *buffer, unsigned int offset, int size) { struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); struct virtio_pcidev_msg hdr = { .op = VIRTIO_PCIDEV_OP_MMIO_READ, .bar = bar, .size = size, .addr = offset, }; memset(buffer, 0xff, size); virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); } static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar, unsigned int offset, int size) { /* 8 is maximum size - we may only use parts of it */ u8 data[8]; /* size has been checked in um_pci_bar_read() */ virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size); switch (size) { case 1: return data[0]; case 2: return le16_to_cpup((void *)data); case 4: return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: return le64_to_cpup((void *)data); #endif default: return ULONG_MAX; } } static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev, int bar, unsigned int offset, const void *buffer, int size) { struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); struct virtio_pcidev_msg hdr = { .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, .bar = bar, .size = size, .addr = offset, }; virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); } static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar, unsigned int offset, int size, unsigned long val) { /* maximum size - we may only use parts of it */ u8 data[8]; /* size has been checked in um_pci_bar_write() */ switch (size) { case 1: data[0] = (u8)val; break; case 2: put_unaligned_le16(val, (void *)data); break; case 4: put_unaligned_le32(val, (void *)data); break; #ifdef CONFIG_64BIT case 8: put_unaligned_le64(val, (void *)data); break; #endif } virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size); } static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar, unsigned int offset, u8 value, int size) { struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); struct { struct virtio_pcidev_msg hdr; u8 data; } msg = { .hdr = { .op = VIRTIO_PCIDEV_OP_CFG_WRITE, .bar = bar, .size = size, .addr = offset, }, .data = value, }; virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); } static const struct um_pci_ops virtio_pcidev_um_pci_ops = { .cfgspace_read = virtio_pcidev_cfgspace_read, .cfgspace_write = virtio_pcidev_cfgspace_write, .bar_read = virtio_pcidev_bar_read, .bar_write = virtio_pcidev_bar_write, .bar_copy_from = virtio_pcidev_bar_copy_from, .bar_copy_to = virtio_pcidev_bar_copy_to, .bar_set = virtio_pcidev_bar_set, }; static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) { struct scatterlist sg[1]; sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) kfree(buf); else if (kick) virtqueue_kick(vq); } static void virtio_pcidev_handle_irq_message(struct virtqueue *vq, struct virtio_pcidev_msg *msg) { struct virtio_device *vdev = vq->vdev; struct virtio_pcidev_device *dev = vdev->priv; if (!dev->pdev.irq) return; /* we should properly chain interrupts, but on ARCH=um we don't care */ switch (msg->op) { case VIRTIO_PCIDEV_OP_INT: generic_handle_irq(dev->pdev.irq); break; case VIRTIO_PCIDEV_OP_MSI: /* our MSI message is just the interrupt number */ if (msg->size == sizeof(u32)) generic_handle_irq(le32_to_cpup((void *)msg->data)); else generic_handle_irq(le16_to_cpup((void *)msg->data)); break; case VIRTIO_PCIDEV_OP_PME: /* nothing to do - we already woke up due to the message */ break; default: dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); break; } } static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq) { struct virtio_device *vdev = vq->vdev; struct virtio_pcidev_device *dev = vdev->priv; void *cmd; int len; if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) return; while ((cmd = virtqueue_get_buf(vq, &len))) virtio_pcidev_free_buf(dev, cmd); } static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq) { struct virtio_pcidev_msg *msg; int len; while ((msg = virtqueue_get_buf(vq, &len))) { if (len >= sizeof(*msg)) virtio_pcidev_handle_irq_message(vq, msg); /* recycle the message buffer */ virtio_pcidev_irq_vq_addbuf(vq, msg, true); } } static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev) { struct virtqueue_info vqs_info[] = { { "cmd", virtio_pcidev_cmd_vq_cb }, { "irq", virtio_pcidev_irq_vq_cb }, }; struct virtqueue *vqs[2]; int err, i; err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); if (err) return err; dev->cmd_vq = vqs[0]; dev->irq_vq = vqs[1]; virtio_device_ready(dev->vdev); for (i = 0; i < NUM_IRQ_MSGS; i++) { void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); if (msg) virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false); } virtqueue_kick(dev->irq_vq); return 0; } static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev, struct virtio_pcidev_device *dev) { um_pci_platform_device_unregister(&dev->pdev); virtio_reset_device(vdev); vdev->config->del_vqs(vdev); kfree(dev); } static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev, struct virtio_pcidev_device *dev) { int err; dev->platform = true; err = virtio_pcidev_init_vqs(dev); if (err) goto err_free; err = um_pci_platform_device_register(&dev->pdev); if (err) goto err_reset; err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); if (err) goto err_unregister; return 0; err_unregister: um_pci_platform_device_unregister(&dev->pdev); err_reset: virtio_reset_device(vdev); vdev->config->del_vqs(vdev); err_free: kfree(dev); return err; } static int virtio_pcidev_virtio_probe(struct virtio_device *vdev) { struct virtio_pcidev_device *dev; int err; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->vdev = vdev; vdev->priv = dev; dev->pdev.ops = &virtio_pcidev_um_pci_ops; if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) return virtio_pcidev_virtio_platform_probe(vdev, dev); err = virtio_pcidev_init_vqs(dev); if (err) goto err_free; err = um_pci_device_register(&dev->pdev); if (err) goto err_reset; device_set_wakeup_enable(&vdev->dev, true); /* * In order to do suspend-resume properly, don't allow VQs * to be suspended. */ virtio_uml_set_no_vq_suspend(vdev, true); return 0; err_reset: virtio_reset_device(vdev); vdev->config->del_vqs(vdev); err_free: kfree(dev); return err; } static void virtio_pcidev_virtio_remove(struct virtio_device *vdev) { struct virtio_pcidev_device *dev = vdev->priv; if (dev->platform) { of_platform_depopulate(&vdev->dev); __virtio_pcidev_virtio_platform_remove(vdev, dev); return; } device_set_wakeup_enable(&vdev->dev, false); um_pci_device_unregister(&dev->pdev); /* Stop all virtqueues */ virtio_reset_device(vdev); dev->cmd_vq = NULL; dev->irq_vq = NULL; vdev->config->del_vqs(vdev); kfree(dev); } static struct virtio_device_id id_table[] = { { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, { 0 }, }; MODULE_DEVICE_TABLE(virtio, id_table); static struct virtio_driver virtio_pcidev_virtio_driver = { .driver.name = "virtio-pci", .id_table = id_table, .probe = virtio_pcidev_virtio_probe, .remove = virtio_pcidev_virtio_remove, }; static int __init virtio_pcidev_init(void) { if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, "No virtio device ID configured for PCI - no PCI support\n")) return 0; return register_virtio_driver(&virtio_pcidev_virtio_driver); } late_initcall(virtio_pcidev_init); static void __exit virtio_pcidev_exit(void) { unregister_virtio_driver(&virtio_pcidev_virtio_driver); } module_exit(virtio_pcidev_exit);