diff options
Diffstat (limited to 'arch/um/drivers')
31 files changed, 1236 insertions, 1168 deletions
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index b94b2618e7d8..9cb196070614 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -297,26 +297,6 @@ config UML_NET_MCAST If unsure, say N. -config UML_NET_PCAP - bool "pcap transport (obsolete)" - depends on UML_NET - depends on !MODVERSIONS - select MAY_HAVE_RUNTIME_DEPS - help - The pcap transport makes a pcap packet stream on the host look - like an ethernet device inside UML. This is useful for making - UML act as a network monitor for the host. You must have libcap - installed in order to build the pcap transport into UML. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable this option. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - config UML_NET_SLIRP bool "SLiRP transport (obsolete)" depends on UML_NET @@ -365,16 +345,20 @@ config UML_RTC by providing a fake RTC clock that causes a wakeup at the right time. -config UML_PCI_OVER_VIRTIO - bool "Enable PCI over VIRTIO device simulation" - # in theory, just VIRTIO is enough, but that causes recursion - depends on VIRTIO_UML +config UML_PCI + bool select FORCE_PCI select UML_IOMEM_EMULATION select UML_DMA_EMULATION select PCI_MSI select PCI_LOCKLESS_CONFIG +config UML_PCI_OVER_VIRTIO + bool "Enable PCI over VIRTIO device simulation" + # in theory, just VIRTIO is enough, but that causes recursion + depends on VIRTIO_UML + select UML_PCI + config UML_PCI_OVER_VIRTIO_DEVICE_ID int "set the virtio device ID for PCI emulation" default -1 diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 0e6af81096fd..0a5820343ad3 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -20,14 +20,9 @@ harddog-objs := harddog_kern.o harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o rtc-objs := rtc_kern.o rtc_user.o -LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) - LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) -targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o - -$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o - $(LD) -r -dp -o $@ $^ $(ld_flags) +targets := vde_kern.o vde_user.o $(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o $(LD) -r -dp -o $@ $^ $(ld_flags) @@ -49,7 +44,6 @@ obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o -obj-$(CONFIG_UML_NET_PCAP) += pcap.o obj-$(CONFIG_UML_NET) += net.o obj-$(CONFIG_MCONSOLE) += mconsole.o obj-$(CONFIG_MMAPPER) += mmapper_kern.o @@ -66,10 +60,11 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o obj-$(CONFIG_UML_RTC) += rtc.o -obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o +obj-$(CONFIG_UML_PCI) += virt-pci.o +obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o # pcap_user.o must be added explicitly. -USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o +USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"' diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h index e14b9cdf7a33..5a61db512ffb 100644 --- a/arch/um/drivers/chan.h +++ b/arch/um/drivers/chan.h @@ -22,7 +22,8 @@ struct chan { unsigned int output:1; unsigned int opened:1; unsigned int enabled:1; - int fd; + int fd_in; + int fd_out; /* only different to fd_in if blocking output is needed */ const struct chan_ops *ops; void *data; }; diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 37538b4168da..e78a99816c86 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = { }; #endif /* CONFIG_NOCONFIG_CHAN */ +static inline bool need_output_blocking(void) +{ + return time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL; +} + static int open_one_chan(struct chan *chan) { int fd, err; @@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan) return fd; err = os_set_fd_block(fd, 0); - if (err) { - (*chan->ops->close)(fd, chan->data); - return err; - } + if (err) + goto out_close; + + chan->fd_in = fd; + chan->fd_out = fd; + + /* + * In time-travel modes infinite-CPU and external we need to guarantee + * that any writes to the output succeed immdiately from the point of + * the VM. The best way to do this is to put the FD in blocking mode + * and simply wait/retry until everything is written. + * As every write is guaranteed to complete, we also do not need to + * request an IRQ for the output. + * + * Note that input cannot happen in a time synchronized way. We permit + * it, but time passes very quickly if anything waits for a read. + */ + if (chan->output && need_output_blocking()) { + err = os_dup_file(chan->fd_out); + if (err < 0) + goto out_close; - chan->fd = fd; + chan->fd_out = err; + + err = os_set_fd_block(chan->fd_out, 1); + if (err) { + os_close_file(chan->fd_out); + goto out_close; + } + } chan->opened = 1; return 0; + +out_close: + (*chan->ops->close)(fd, chan->data); + return err; } static int open_chan(struct list_head *chans) @@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans) void chan_enable_winch(struct chan *chan, struct tty_port *port) { if (chan && chan->primary && chan->ops->winch) - register_winch(chan->fd, port); + register_winch(chan->fd_in, port); } static void line_timer_cb(struct work_struct *work) @@ -156,8 +190,9 @@ int enable_chan(struct line *line) if (chan->enabled) continue; - err = line_setup_irq(chan->fd, chan->input, chan->output, line, - chan); + err = line_setup_irq(chan->fd_in, chan->input, + chan->output && !need_output_blocking(), + line, chan); if (err) goto out_close; @@ -196,7 +231,8 @@ void free_irqs(void) if (chan->input && chan->enabled) um_free_irq(chan->line->read_irq, chan); - if (chan->output && chan->enabled) + if (chan->output && chan->enabled && + !need_output_blocking()) um_free_irq(chan->line->write_irq, chan); chan->enabled = 0; } @@ -216,15 +252,19 @@ static void close_one_chan(struct chan *chan, int delay_free_irq) } else { if (chan->input && chan->enabled) um_free_irq(chan->line->read_irq, chan); - if (chan->output && chan->enabled) + if (chan->output && chan->enabled && + !need_output_blocking()) um_free_irq(chan->line->write_irq, chan); chan->enabled = 0; } + if (chan->fd_out != chan->fd_in) + os_close_file(chan->fd_out); if (chan->ops->close != NULL) - (*chan->ops->close)(chan->fd, chan->data); + (*chan->ops->close)(chan->fd_in, chan->data); chan->opened = 0; - chan->fd = -1; + chan->fd_in = -1; + chan->fd_out = -1; } void close_chan(struct line *line) @@ -244,7 +284,7 @@ void close_chan(struct line *line) void deactivate_chan(struct chan *chan, int irq) { if (chan && chan->enabled) - deactivate_fd(chan->fd, irq); + deactivate_fd(chan->fd_in, irq); } int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq) @@ -254,7 +294,7 @@ int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq) if (len == 0 || !chan || !chan->ops->write) return 0; - n = chan->ops->write(chan->fd, buf, len, chan->data); + n = chan->ops->write(chan->fd_out, buf, len, chan->data); if (chan->primary) { ret = n; } @@ -268,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len) if (!chan || !chan->ops->console_write) return 0; - n = chan->ops->console_write(chan->fd, buf, len); + n = chan->ops->console_write(chan->fd_out, buf, len); if (chan->primary) ret = n; return ret; @@ -296,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out, if (chan && chan->primary) { if (chan->ops->window_size == NULL) return 0; - return chan->ops->window_size(chan->fd, chan->data, + return chan->ops->window_size(chan->fd_in, chan->data, rows_out, cols_out); } chan = line->chan_out; if (chan && chan->primary) { if (chan->ops->window_size == NULL) return 0; - return chan->ops->window_size(chan->fd, chan->data, + return chan->ops->window_size(chan->fd_in, chan->data, rows_out, cols_out); } return 0; @@ -319,7 +359,7 @@ static void free_one_chan(struct chan *chan) (*chan->ops->free)(chan->data); if (chan->primary && chan->output) - ignore_sigio_fd(chan->fd); + ignore_sigio_fd(chan->fd_in); kfree(chan); } @@ -478,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device, .output = 0, .opened = 0, .enabled = 0, - .fd = -1, + .fd_in = -1, + .fd_out = -1, .ops = ops, .data = data }); return chan; @@ -549,7 +590,7 @@ void chan_interrupt(struct line *line, int irq) schedule_delayed_work(&line->task, 1); goto out; } - err = chan->ops->read(chan->fd, &c, chan->data); + err = chan->ops->read(chan->fd_in, &c, chan->data); if (err > 0) tty_insert_flip_char(port, c, TTY_NORMAL); } while (err > 0); diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index ec04e47b9d79..35f9beeb19b3 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -23,7 +23,7 @@ int generic_read(int fd, __u8 *c_out, void *unused) { int n; - n = read(fd, c_out, sizeof(*c_out)); + CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out))); if (n > 0) return n; else if (n == 0) @@ -37,11 +37,23 @@ int generic_read(int fd, __u8 *c_out, void *unused) int generic_write(int fd, const __u8 *buf, size_t n, void *unused) { + int written = 0; int err; - err = write(fd, buf, n); - if (err > 0) - return err; + /* The FD may be in blocking mode, as such, need to retry short writes, + * they may have been interrupted by a signal. + */ + do { + errno = 0; + err = write(fd, buf + written, n - written); + if (err > 0) { + written += err; + continue; + } + } while (err < 0 && errno == EINTR); + + if (written > 0) + return written; else if (errno == EAGAIN) return 0; else if (err == 0) @@ -149,6 +161,8 @@ static __noreturn int winch_thread(void *arg) int count; char c = 1; + os_set_pdeathsig(); + pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; count = write(pipe_fd, &c, sizeof(c)); diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 60d1c6cab8a9..819aabb4ecdc 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -49,6 +49,7 @@ #include "mconsole.h" #include "harddog.h" +MODULE_DESCRIPTION("UML hardware watchdog"); MODULE_LICENSE("GPL"); static DEFINE_MUTEX(harddog_mutex); @@ -163,7 +164,6 @@ static const struct file_operations harddog_fops = { .compat_ioctl = compat_ptr_ioctl, .open = harddog_open, .release = harddog_release, - .llseek = no_llseek, }; static struct miscdevice harddog_miscdev = { diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index c42b793bce65..0ac149de1ac0 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP); #ifndef MODULE static int set_dsp(char *name, int *add) { + *add = 0; dsp = name; return 0; } @@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); static int set_mixer(char *name, int *add) { + *add = 0; mixer = name; return 0; } @@ -291,7 +293,6 @@ static int hostmixer_release(struct inode *inode, struct file *file) static const struct file_operations hostaudio_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = hostaudio_read, .write = hostaudio_write, .poll = hostaudio_poll, @@ -304,7 +305,6 @@ static const struct file_operations hostaudio_fops = { static const struct file_operations hostmixer_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = hostmixer_ioctl_mixdev, .open = hostmixer_open_mixdev, .release = hostmixer_release, diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index ffc5cb92fa36..43d8959cc746 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init, parse_chan_pair(NULL, line, n, opts, error_out); err = 0; } + *error_out = "configured as 'none'"; } else { char *new = kstrdup(init, GFP_KERNEL); if (!new) { @@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init, } } if (err) { + *error_out = "failed to parse channel pair"; line->init_str = NULL; line->valid = 0; kfree(new); @@ -676,24 +678,26 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port, goto cleanup; } - *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), - .fd = fd, + *winch = ((struct winch) { .fd = fd, .tty_fd = tty_fd, .pid = pid, .port = port, .stack = stack }); + spin_lock(&winch_handler_lock); + list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, IRQF_SHARED, "winch", winch) < 0) { printk(KERN_ERR "register_winch_irq - failed to register " "IRQ\n"); + spin_lock(&winch_handler_lock); + list_del(&winch->list); + spin_unlock(&winch_handler_lock); goto out_free; } - spin_lock(&winch_handler_lock); - list_add(&winch->list, &winch_handlers); - spin_unlock(&winch_handler_lock); - return; out_free: diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index e24298a734be..a04cd13c6315 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -71,7 +71,9 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req) return NULL; } +#ifndef MIN #define MIN(a,b) ((a)<(b) ? (a):(b)) +#endif #define STRINGX(x) #x #define STRING(x) STRINGX(x) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 77c4afb8ab90..d5a9c5aabaec 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -336,7 +336,7 @@ static struct platform_driver uml_net_driver = { static void net_device_release(struct device *dev) { - struct uml_net *device = dev_get_drvdata(dev); + struct uml_net *device = container_of(dev, struct uml_net, pdev.dev); struct net_device *netdev = device->dev; struct uml_net_private *lp = netdev_priv(netdev); @@ -636,10 +636,7 @@ static int __init eth_setup(char *str) return 1; } - new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); - if (!new) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(*new)); + new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES); INIT_LIST_HEAD(&new->list); new->index = n; diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c deleted file mode 100644 index 25ee2c97ca21..000000000000 --- a/arch/um/drivers/pcap_kern.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "pcap_user.h" - -struct pcap_init { - char *host_if; - int promisc; - int optimize; - char *filter; -}; - -void pcap_init_kern(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct pcap_data *ppri; - struct pcap_init *init = data; - - pri = netdev_priv(dev); - ppri = (struct pcap_data *) pri->user; - ppri->host_if = init->host_if; - ppri->promisc = init->promisc; - ppri->optimize = init->optimize; - ppri->filter = init->filter; - - printk("pcap backend, host interface %s\n", ppri->host_if); -} - -static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return pcap_user_read(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER, - (struct pcap_data *) &lp->user); -} - -static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return -EPERM; -} - -static const struct net_kern_info pcap_kern_info = { - .init = pcap_init_kern, - .protocol = eth_protocol, - .read = pcap_read, - .write = pcap_write, -}; - -int pcap_setup(char *str, char **mac_out, void *data) -{ - struct pcap_init *init = data; - char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; - int i; - - *init = ((struct pcap_init) - { .host_if = "eth0", - .promisc = 1, - .optimize = 0, - .filter = NULL }); - - remain = split_if_spec(str, &host_if, &init->filter, - &options[0], &options[1], mac_out, NULL); - if (remain != NULL) { - printk(KERN_ERR "pcap_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (host_if != NULL) - init->host_if = host_if; - - for (i = 0; i < ARRAY_SIZE(options); i++) { - if (options[i] == NULL) - continue; - if (!strcmp(options[i], "promisc")) - init->promisc = 1; - else if (!strcmp(options[i], "nopromisc")) - init->promisc = 0; - else if (!strcmp(options[i], "optimize")) - init->optimize = 1; - else if (!strcmp(options[i], "nooptimize")) - init->optimize = 0; - else { - printk(KERN_ERR "pcap_setup : bad option - '%s'\n", - options[i]); - return 0; - } - } - - return 1; -} - -static struct transport pcap_transport = { - .list = LIST_HEAD_INIT(pcap_transport.list), - .name = "pcap", - .setup = pcap_setup, - .user = &pcap_user_info, - .kern = &pcap_kern_info, - .private_size = sizeof(struct pcap_data), - .setup_size = sizeof(struct pcap_init), -}; - -static int register_pcap(void) -{ - register_transport(&pcap_transport); - return 0; -} - -late_initcall(register_pcap); diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c deleted file mode 100644 index 52ddda3e3b10..000000000000 --- a/arch/um/drivers/pcap_user.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <errno.h> -#include <pcap.h> -#include <string.h> -#include <asm/types.h> -#include <net_user.h> -#include "pcap_user.h" -#include <um_malloc.h> - -#define PCAP_FD(p) (*(int *)(p)) - -static int pcap_user_init(void *data, void *dev) -{ - struct pcap_data *pri = data; - pcap_t *p; - char errors[PCAP_ERRBUF_SIZE]; - - p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER, - pri->promisc, 0, errors); - if (p == NULL) { - printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - " - "'%s'\n", errors); - return -EINVAL; - } - - pri->dev = dev; - pri->pcap = p; - return 0; -} - -static int pcap_user_open(void *data) -{ - struct pcap_data *pri = data; - __u32 netmask; - int err; - - if (pri->pcap == NULL) - return -ENODEV; - - if (pri->filter != NULL) { - err = dev_netmask(pri->dev, &netmask); - if (err < 0) { - printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n"); - return -EIO; - } - - pri->compiled = uml_kmalloc(sizeof(struct bpf_program), - UM_GFP_KERNEL); - if (pri->compiled == NULL) { - printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n"); - return -ENOMEM; - } - - err = pcap_compile(pri->pcap, - (struct bpf_program *) pri->compiled, - pri->filter, pri->optimize, netmask); - if (err < 0) { - printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - " - "'%s'\n", pcap_geterr(pri->pcap)); - goto out; - } - - err = pcap_setfilter(pri->pcap, pri->compiled); - if (err < 0) { - printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter " - "failed - '%s'\n", pcap_geterr(pri->pcap)); - goto out; - } - } - - return PCAP_FD(pri->pcap); - - out: - kfree(pri->compiled); - return -EIO; -} - -static void pcap_remove(void *data) -{ - struct pcap_data *pri = data; - - if (pri->compiled != NULL) - pcap_freecode(pri->compiled); - - if (pri->pcap != NULL) - pcap_close(pri->pcap); -} - -struct pcap_handler_data { - char *buffer; - int len; -}; - -static void handler(u_char *data, const struct pcap_pkthdr *header, - const u_char *packet) -{ - int len; - - struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; - - len = hdata->len < header->caplen ? hdata->len : header->caplen; - memcpy(hdata->buffer, packet, len); - hdata->len = len; -} - -int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) -{ - struct pcap_handler_data hdata = ((struct pcap_handler_data) - { .buffer = buffer, - .len = len }); - int n; - - n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); - if (n < 0) { - printk(UM_KERN_ERR "pcap_dispatch failed - %s\n", - pcap_geterr(pri->pcap)); - return -EIO; - } - else if (n == 0) - return 0; - return hdata.len; -} - -const struct net_user_info pcap_user_info = { - .init = pcap_user_init, - .open = pcap_user_open, - .close = NULL, - .remove = pcap_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h deleted file mode 100644 index 216246f5f09b..000000000000 --- a/arch/um/drivers/pcap_user.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - */ - -#include <net_user.h> - -struct pcap_data { - char *host_if; - int promisc; - int optimize; - char *filter; - void *compiled; - void *pcap; - void *dev; -}; - -extern const struct net_user_info pcap_user_info; - -extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); - diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index c52b3ff3c092..a4508470df78 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -45,15 +45,17 @@ struct connection { static irqreturn_t pipe_interrupt(int irq, void *data) { struct connection *conn = data; - int fd; + int n_fds = 1, fd = -1; + ssize_t ret; - fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); - if (fd < 0) { - if (fd == -EAGAIN) + ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid, + sizeof(conn->helper_pid)); + if (ret != sizeof(conn->helper_pid)) { + if (ret == -EAGAIN) return IRQ_NONE; - printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", - -fd); + printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n", + ret); os_close_file(conn->fd); } diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index da985e0dc69a..ca08c91f47a3 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -79,7 +79,7 @@ static int __init rng_init (void) if (err < 0) goto err_out_cleanup_hw; - sigio_broken(random_fd); + sigio_broken(); hwrng.name = RNG_MODULE_NAME; hwrng.read = rng_dev_read; diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 97ceb205cfe6..9158c936c128 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -51,6 +51,7 @@ static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) { + struct timespec64 ts; unsigned long long secs; if (!enable && !uml_rtc_alarm_enabled) @@ -58,7 +59,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) uml_rtc_alarm_enabled = enable; - secs = uml_rtc_alarm_time - ktime_get_real_seconds(); + read_persistent_clock64(&ts); + secs = uml_rtc_alarm_time - ts.tv_sec; if (time_travel_mode == TT_MODE_OFF) { if (!enable) { @@ -73,7 +75,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) if (enable) time_travel_add_event_rel(¨_rtc_alarm_event, - secs * NSEC_PER_SEC); + secs * NSEC_PER_SEC - + ts.tv_nsec); } return 0; @@ -168,11 +171,10 @@ cleanup: return err; } -static int uml_rtc_remove(struct platform_device *pdev) +static void uml_rtc_remove(struct platform_device *pdev) { device_init_wakeup(&pdev->dev, 0); uml_rtc_cleanup(); - return 0; } static struct platform_driver uml_rtc_driver = { diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c index 7c3cec4c68cf..51e79f3148cd 100644 --- a/arch/um/drivers/rtc_user.c +++ b/arch/um/drivers/rtc_user.c @@ -39,7 +39,7 @@ int uml_rtc_start(bool timetravel) } /* apparently timerfd won't send SIGIO, use workaround */ - sigio_broken(uml_rtc_irq_fds[0]); + sigio_broken(); err = add_sigio_fd(uml_rtc_irq_fds[0]); if (err < 0) { close(uml_rtc_irq_fds[0]); diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c index 8f633e2e5f3d..97228aa080cb 100644 --- a/arch/um/drivers/slirp_user.c +++ b/arch/um/drivers/slirp_user.c @@ -49,7 +49,7 @@ static int slirp_tramp(char **argv, int fd) static int slirp_open(void *data) { struct slirp_data *pri = data; - int fds[2], pid, err; + int fds[2], err; err = os_pipe(fds, 1, 1); if (err) @@ -60,7 +60,6 @@ static int slirp_open(void *data) printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err); goto out; } - pid = err; pri->slave = fds[1]; pri->slip.pos = 0; diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h index f016fe15499f..2985c14661f4 100644 --- a/arch/um/drivers/ubd.h +++ b/arch/um/drivers/ubd.h @@ -7,8 +7,10 @@ #ifndef __UM_UBD_USER_H #define __UM_UBD_USER_H -extern int start_io_thread(unsigned long sp, int *fds_out); -extern int io_thread(void *arg); +#include <os.h> + +int start_io_thread(struct os_helper_thread **td_out, int *fd_out); +void *io_thread(void *arg); extern int kernel_fd; extern int ubd_read_poll(int timeout); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 63fc062add70..4de6613e7468 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -36,7 +36,6 @@ #include <linux/vmalloc.h> #include <linux/platform_device.h> #include <linux/scatterlist.h> -#include <asm/tlbflush.h> #include <kern_util.h> #include "mconsole_kern.h" #include <init.h> @@ -106,7 +105,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data) #define DRIVER_NAME "uml-blkdev" static DEFINE_MUTEX(ubd_lock); -static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); @@ -447,53 +445,41 @@ static int bulk_req_safe_read( return n; } -/* Called without dev->lock held, and only in interrupt context. */ -static void ubd_handler(void) +static void ubd_end_request(struct io_thread_req *io_req) { - int n; - int count; - - while(1){ - n = bulk_req_safe_read( - thread_fd, - irq_req_buffer, - &irq_remainder, - &irq_remainder_size, - UBD_REQ_BUFFER_SIZE - ); - if (n < 0) { - if(n == -EAGAIN) - break; - printk(KERN_ERR "spurious interrupt in ubd_handler, " - "err = %d\n", -n); - return; - } - for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - struct io_thread_req *io_req = (*irq_req_buffer)[count]; - - if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { - blk_queue_max_discard_sectors(io_req->req->q, 0); - blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); - } - blk_mq_end_request(io_req->req, io_req->error); - kfree(io_req); - } + if (io_req->error == BLK_STS_NOTSUPP) { + if (req_op(io_req->req) == REQ_OP_DISCARD) + blk_queue_disable_discard(io_req->req->q); + else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES) + blk_queue_disable_write_zeroes(io_req->req->q); } + blk_mq_end_request(io_req->req, io_req->error); + kfree(io_req); } static irqreturn_t ubd_intr(int irq, void *dev) { - ubd_handler(); + int len, i; + + while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer, + &irq_remainder, &irq_remainder_size, + UBD_REQ_BUFFER_SIZE)) >= 0) { + for (i = 0; i < len / sizeof(struct io_thread_req *); i++) + ubd_end_request((*irq_req_buffer)[i]); + } + + if (len < 0 && len != -EAGAIN) + pr_err("spurious interrupt in %s, err = %d\n", __func__, len); return IRQ_HANDLED; } /* Only changed by ubd_init, which is an initcall. */ -static int io_pid = -1; +static struct os_helper_thread *io_td; static void kill_io_thread(void) { - if(io_pid != -1) - os_kill_process(io_pid, 1); + if (io_td) + os_kill_helper_thread(io_td); } __uml_exitcall(kill_io_thread); @@ -771,7 +757,6 @@ static int ubd_open_dev(struct ubd *ubd_dev) printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); goto error; } - flush_tlb_kernel_vm(); err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset, @@ -794,7 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev) static void ubd_device_release(struct device *dev) { - struct ubd *ubd_dev = dev_get_drvdata(dev); + struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev); blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); @@ -847,6 +832,7 @@ static int ubd_add(int n, char **error_out) struct queue_limits lim = { .max_segments = MAX_SG, .seg_boundary_mask = PAGE_SIZE - 1, + .features = BLK_FEAT_WRITE_CACHE, }; struct gendisk *disk; int err = 0; @@ -879,7 +865,6 @@ static int ubd_add(int n, char **error_out) ubd_dev->tag_set.ops = &ubd_mq_ops; ubd_dev->tag_set.queue_depth = 64; ubd_dev->tag_set.numa_node = NUMA_NO_NODE; - ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ubd_dev->tag_set.driver_data = ubd_dev; ubd_dev->tag_set.nr_hw_queues = 1; @@ -893,8 +878,6 @@ static int ubd_add(int n, char **error_out) goto out_cleanup_tags; } - blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); - blk_queue_write_cache(disk->queue, true, false); disk->major = UBD_MAJOR; disk->first_minor = n << UBD_SHIFT; disk->minors = 1 << UBD_SHIFT; @@ -914,6 +897,8 @@ static int ubd_add(int n, char **error_out) if (err) goto out_cleanup_disk; + ubd_dev->disk = disk; + return 0; out_cleanup_disk: @@ -1092,7 +1077,7 @@ static int __init ubd_init(void) if (irq_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, sizeof(struct io_thread_req *), @@ -1103,7 +1088,7 @@ static int __init ubd_init(void) if (io_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } platform_driver_register(&ubd_driver); mutex_lock(&ubd_lock); @@ -1119,8 +1104,8 @@ static int __init ubd_init(void) late_initcall(ubd_init); -static int __init ubd_driver_init(void){ - unsigned long stack; +static int __init ubd_driver_init(void) +{ int err; /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ @@ -1129,13 +1114,11 @@ static int __init ubd_driver_init(void){ /* Letting ubd=sync be like using ubd#s= instead of ubd#= is * enough. So use anyway the io thread. */ } - stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd); - if(io_pid < 0){ + err = start_io_thread(&io_td, &thread_fd); + if (err < 0) { printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); - io_pid = -1; + "falling back to synchronous I/O\n", -err); return 0; } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, @@ -1511,11 +1494,11 @@ int kernel_fd = -1; /* Only changed by the io thread. XXX: currently unused. */ static int io_count; -int io_thread(void *arg) +void *io_thread(void *arg) { int n, count, written, res; - os_fix_helper_signals(); + os_fix_helper_thread_signals(); while(1){ n = bulk_req_safe_read( @@ -1557,5 +1540,5 @@ int io_thread(void *arg) } while (written < n); } - return 0; + return NULL; } diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index a1afe414ce48..c5e6545f6fcf 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -23,11 +23,11 @@ #include <os.h> #include <poll.h> -struct pollfd kernel_pollfd; +static struct pollfd kernel_pollfd; -int start_io_thread(unsigned long sp, int *fd_out) +int start_io_thread(struct os_helper_thread **td_out, int *fd_out) { - int pid, fds[2], err; + int fds[2], err; err = os_pipe(fds, 1, 1); if(err < 0){ @@ -47,14 +47,14 @@ int start_io_thread(unsigned long sp, int *fd_out) goto out_close; } - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); - if(pid < 0){ - err = -errno; - printk("start_io_thread - clone failed : errno = %d\n", errno); + err = os_run_helper_thread(td_out, io_thread, NULL); + if (err < 0) { + printk("%s - failed to run helper thread, err = %d\n", + __func__, -err); goto out_close; } - return(pid); + return 0; out_close: os_close_file(fds[0]); diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index dc2feae789cb..b97bb52dd562 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/firmware.h> #include <linux/fs.h> +#include <asm/atomic.h> #include <uapi/linux/filter.h> #include <init.h> #include <irq_kern.h> @@ -102,18 +103,33 @@ static const struct { static void vector_reset_stats(struct vector_private *vp) { + /* We reuse the existing queue locks for stats */ + + /* RX stats are modified with RX head_lock held + * in vector_poll. + */ + + spin_lock(&vp->rx_queue->head_lock); vp->estats.rx_queue_max = 0; vp->estats.rx_queue_running_average = 0; - vp->estats.tx_queue_max = 0; - vp->estats.tx_queue_running_average = 0; vp->estats.rx_encaps_errors = 0; + vp->estats.sg_ok = 0; + vp->estats.sg_linearized = 0; + spin_unlock(&vp->rx_queue->head_lock); + + /* TX stats are modified with TX head_lock held + * in vector_send. + */ + + spin_lock(&vp->tx_queue->head_lock); vp->estats.tx_timeout_count = 0; vp->estats.tx_restart_queue = 0; vp->estats.tx_kicks = 0; vp->estats.tx_flow_control_xon = 0; vp->estats.tx_flow_control_xoff = 0; - vp->estats.sg_ok = 0; - vp->estats.sg_linearized = 0; + vp->estats.tx_queue_max = 0; + vp->estats.tx_queue_running_average = 0; + spin_unlock(&vp->tx_queue->head_lock); } static int get_mtu(struct arglist *def) @@ -141,7 +157,7 @@ static bool get_bpf_flash(struct arglist *def) if (allow != NULL) { if (kstrtoul(allow, 10, &result) == 0) - return (allow > 0); + return result > 0; } return false; } @@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def) static char *drop_buffer; -/* Array backed queues optimized for bulk enqueue/dequeue and - * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. - * For more details and full design rationale see - * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt - */ - /* * Advance the mmsg queue head by n = advance. Resets the queue to @@ -247,27 +257,13 @@ static char *drop_buffer; static int vector_advancehead(struct vector_queue *qi, int advance) { - int queue_depth; - qi->head = (qi->head + advance) % qi->max_depth; - spin_lock(&qi->tail_lock); - qi->queue_depth -= advance; - - /* we are at 0, use this to - * reset head and tail so we can use max size vectors - */ - - if (qi->queue_depth == 0) { - qi->head = 0; - qi->tail = 0; - } - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - return queue_depth; + atomic_sub(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } /* Advance the queue tail by n = advance. @@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance) static int vector_advancetail(struct vector_queue *qi, int advance) { - int queue_depth; - qi->tail = (qi->tail + advance) % qi->max_depth; - spin_lock(&qi->head_lock); - qi->queue_depth += advance; - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); - return queue_depth; + atomic_add(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } static int prep_msg(struct vector_private *vp, @@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) int iov_count; spin_lock(&qi->tail_lock); - spin_lock(&qi->head_lock); - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); + queue_depth = atomic_read(&qi->queue_depth); if (skb) packet_len = skb->len; @@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) mmsg_vector->msg_hdr.msg_iovlen = iov_count; mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; + wmb(); /* Make the packet visible to the NAPI poll thread */ queue_depth = vector_advancetail(qi, 1); } else goto drop; @@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count) } /* - * Generic vector deque via sendmmsg with support for forming headers + * Generic vector dequeue via sendmmsg with support for forming headers * using transport specific callback. Allows GRE, L2TPv3, RAW and * other transports to use a common dequeue procedure in vector mode */ @@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *send_from; - int result = 0, send_len, queue_depth = qi->max_depth; + int result = 0, send_len; if (spin_trylock(&qi->head_lock)) { - if (spin_trylock(&qi->tail_lock)) { - /* update queue_depth to current value */ - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - while (queue_depth > 0) { - /* Calculate the start of the vector */ - send_len = queue_depth; - send_from = qi->mmsg_vector; - send_from += qi->head; - /* Adjust vector size if wraparound */ - if (send_len + qi->head > qi->max_depth) - send_len = qi->max_depth - qi->head; - /* Try to TX as many packets as possible */ - if (send_len > 0) { - result = uml_vector_sendmmsg( - vp->fds->tx_fd, - send_from, - send_len, - 0 - ); - vp->in_write_poll = - (result != send_len); - } - /* For some of the sendmmsg error scenarios - * we may end being unsure in the TX success - * for all packets. It is safer to declare - * them all TX-ed and blame the network. - */ - if (result < 0) { - if (net_ratelimit()) - netdev_err(vp->dev, "sendmmsg err=%i\n", - result); - vp->in_error = true; - result = send_len; - } - if (result > 0) { - queue_depth = - consume_vector_skbs(qi, result); - /* This is equivalent to an TX IRQ. - * Restart the upper layers to feed us - * more packets. - */ - if (result > vp->estats.tx_queue_max) - vp->estats.tx_queue_max = result; - vp->estats.tx_queue_running_average = - (vp->estats.tx_queue_running_average + result) >> 1; - } - netif_wake_queue(qi->dev); - /* if TX is busy, break out of the send loop, - * poll write IRQ will reschedule xmit for us + /* update queue_depth to current value */ + while (atomic_read(&qi->queue_depth) > 0) { + /* Calculate the start of the vector */ + send_len = atomic_read(&qi->queue_depth); + send_from = qi->mmsg_vector; + send_from += qi->head; + /* Adjust vector size if wraparound */ + if (send_len + qi->head > qi->max_depth) + send_len = qi->max_depth - qi->head; + /* Try to TX as many packets as possible */ + if (send_len > 0) { + result = uml_vector_sendmmsg( + vp->fds->tx_fd, + send_from, + send_len, + 0 + ); + vp->in_write_poll = + (result != send_len); + } + /* For some of the sendmmsg error scenarios + * we may end being unsure in the TX success + * for all packets. It is safer to declare + * them all TX-ed and blame the network. + */ + if (result < 0) { + if (net_ratelimit()) + netdev_err(vp->dev, "sendmmsg err=%i\n", + result); + vp->in_error = true; + result = send_len; + } + if (result > 0) { + consume_vector_skbs(qi, result); + /* This is equivalent to an TX IRQ. + * Restart the upper layers to feed us + * more packets. */ - if (result != send_len) { - vp->estats.tx_restart_queue++; - break; - } + if (result > vp->estats.tx_queue_max) + vp->estats.tx_queue_max = result; + vp->estats.tx_queue_running_average = + (vp->estats.tx_queue_running_average + result) >> 1; + } + netif_wake_queue(qi->dev); + /* if TX is busy, break out of the send loop, + * poll write IRQ will reschedule xmit for us. + */ + if (result != send_len) { + vp->estats.tx_restart_queue++; + break; } } spin_unlock(&qi->head_lock); } - return queue_depth; + return atomic_read(&qi->queue_depth); } /* Queue destructor. Deliberately stateless so we can use @@ -589,7 +574,7 @@ static struct vector_queue *create_queue( } spin_lock_init(&result->head_lock); spin_lock_init(&result->tail_lock); - result->queue_depth = 0; + atomic_set(&result->queue_depth, 0); result->head = 0; result->tail = 0; return result; @@ -668,18 +653,27 @@ done: } -/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ +/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */ static void prep_queue_for_rx(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *mmsg_vector = qi->mmsg_vector; void **skbuff_vector = qi->skbuff_vector; - int i; + int i, queue_depth; + + queue_depth = atomic_read(&qi->queue_depth); - if (qi->queue_depth == 0) + if (queue_depth == 0) return; - for (i = 0; i < qi->queue_depth; i++) { + + /* RX is always emptied 100% during each cycle, so we do not + * have to do the tail wraparound math for it. + */ + + qi->head = qi->tail = 0; + + for (i = 0; i < queue_depth; i++) { /* it is OK if allocation fails - recvmmsg with NULL data in * iov argument still performs an RX, just drops the packet * This allows us stop faffing around with a "drop buffer" @@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi) skbuff_vector++; mmsg_vector++; } - qi->queue_depth = 0; + atomic_set(&qi->queue_depth, 0); } static struct vector_device *find_device(int n) @@ -712,11 +706,9 @@ static struct vector_device *find_device(int n) static int vector_parse(char *str, int *index_out, char **str_out, char **error_out) { - int n, len, err; + int n, err; char *start = str; - len = strlen(str); - while ((*str != ':') && (strlen(str) > 1)) str++; if (*str != ':') { @@ -823,7 +815,8 @@ static struct platform_driver uml_net_driver = { static void vector_device_release(struct device *dev) { - struct vector_device *device = dev_get_drvdata(dev); + struct vector_device *device = + container_of(dev, struct vector_device, pdev.dev); struct net_device *netdev = device->dev; list_del(&device->list); @@ -974,7 +967,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) budget = qi->max_depth; packet_count = uml_vector_recvmmsg( - vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); + vp->fds->rx_fd, qi->mmsg_vector, budget, 0); if (packet_count < 0) vp->in_error = true; @@ -987,7 +980,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) * many do we need to prep the next time prep_queue_for_rx() is called. */ - qi->queue_depth = packet_count; + atomic_add(packet_count, &qi->queue_depth); for (i = 0; i < packet_count; i++) { skb = (*skbuff_vector); @@ -1117,10 +1110,11 @@ static int irq_rr; static int vector_net_close(struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); - unsigned long flags; netif_stop_queue(dev); - del_timer(&vp->tl); + timer_delete(&vp->tl); + + vp->opened = false; if (vp->fds == NULL) return 0; @@ -1160,10 +1154,7 @@ static int vector_net_close(struct net_device *dev) destroy_queue(vp->tx_queue); kfree(vp->fds); vp->fds = NULL; - spin_lock_irqsave(&vp->lock, flags); - vp->opened = false; vp->in_error = false; - spin_unlock_irqrestore(&vp->lock, flags); return 0; } @@ -1176,6 +1167,7 @@ static int vector_poll(struct napi_struct *napi, int budget) if ((vp->options & VECTOR_TX) != 0) tx_enqueued = (vector_send(vp->tx_queue) > 0); + spin_lock(&vp->rx_queue->head_lock); if ((vp->options & VECTOR_RX) > 0) err = vector_mmsg_rx(vp, budget); else { @@ -1183,12 +1175,13 @@ static int vector_poll(struct napi_struct *napi, int budget) if (err > 0) err = 1; } + spin_unlock(&vp->rx_queue->head_lock); if (err > 0) work_done += err; if (tx_enqueued || err > 0) napi_schedule(napi); - if (work_done < budget) + if (work_done <= budget) napi_complete_done(napi, work_done); return work_done; } @@ -1205,17 +1198,12 @@ static void vector_reset_tx(struct work_struct *work) static int vector_net_open(struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); - unsigned long flags; int err = -EINVAL; struct vector_device *vdevice; - spin_lock_irqsave(&vp->lock, flags); - if (vp->opened) { - spin_unlock_irqrestore(&vp->lock, flags); + if (vp->opened) return -ENXIO; - } vp->opened = true; - spin_unlock_irqrestore(&vp->lock, flags); vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed)); @@ -1234,7 +1222,7 @@ static int vector_net_open(struct net_device *dev) vp->rx_header_size, MAX_IOV_SIZE ); - vp->rx_queue->queue_depth = get_depth(vp->parsed); + atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed)); } else { vp->header_rxbuffer = kmalloc( vp->rx_header_size, @@ -1389,8 +1377,6 @@ static int vector_net_load_bpf_flash(struct net_device *dev, return -1; } - spin_lock(&vp->lock); - if (vp->bpf != NULL) { if (vp->opened) uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); @@ -1419,15 +1405,12 @@ static int vector_net_load_bpf_flash(struct net_device *dev, if (vp->opened) result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); - spin_unlock(&vp->lock); - return result; free_buffer: release_firmware(fw); flash_fail: - spin_unlock(&vp->lock); if (vp->bpf != NULL) kfree(vp->bpf->filter); kfree(vp->bpf); @@ -1481,7 +1464,17 @@ static void vector_get_ethtool_stats(struct net_device *dev, { struct vector_private *vp = netdev_priv(dev); + /* Stats are modified in the dequeue portions of + * rx/tx which are protected by the head locks + * grabbing these locks here ensures they are up + * to date. + */ + + spin_lock(&vp->tx_queue->head_lock); + spin_lock(&vp->rx_queue->head_lock); memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); + spin_unlock(&vp->rx_queue->head_lock); + spin_unlock(&vp->tx_queue->head_lock); } static int vector_get_coalesce(struct net_device *netdev, @@ -1633,7 +1626,6 @@ static void vector_eth_configure( INIT_WORK(&vp->reset_tx, vector_reset_tx); timer_setup(&vp->tl, vector_timer_expire, 0); - spin_lock_init(&vp->lock); /* FIXME */ dev->netdev_ops = &vector_netdev_ops; @@ -1702,10 +1694,7 @@ static int __init vector_setup(char *str) str, error); return 1; } - new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); - if (!new) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(*new)); + new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES); INIT_LIST_HEAD(&new->list); new->unit = n; new->arguments = str; diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h index 2a1fa8e0f3e1..417834793658 100644 --- a/arch/um/drivers/vector_kern.h +++ b/arch/um/drivers/vector_kern.h @@ -14,6 +14,7 @@ #include <linux/ctype.h> #include <linux/workqueue.h> #include <linux/interrupt.h> +#include <asm/atomic.h> #include "vector_user.h" @@ -44,7 +45,8 @@ struct vector_queue { struct net_device *dev; spinlock_t head_lock; spinlock_t tail_lock; - int queue_depth, head, tail, max_depth, max_iov_frags; + atomic_t queue_depth; + int head, tail, max_depth, max_iov_frags; short options; }; @@ -71,7 +73,6 @@ struct vector_estats { struct vector_private { struct list_head list; - spinlock_t lock; struct net_device *dev; struct napi_struct napi ____cacheline_aligned; diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index b16a5e5619d3..2ea67e6fd067 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -46,6 +46,9 @@ #define TRANS_FD "fd" #define TRANS_FD_LEN strlen(TRANS_FD) +#define TRANS_VDE "vde" +#define TRANS_VDE_LEN strlen(TRANS_VDE) + #define VNET_HDR_FAIL "could not enable vnet headers on fd %d" #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" @@ -434,6 +437,84 @@ fd_cleanup: return NULL; } +/* enough char to store an int type */ +#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2) +#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3) +/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */ +#define VDE_MAX_ARGC 12 +#define VDE_SEQPACKET_HEAD "seqpacket://" +#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1) +#define VDE_DEFAULT_DESCRIPTION "UML" + +static struct vector_fds *user_init_vde_fds(struct arglist *ifspec) +{ + char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1]; + char *argv[VDE_MAX_ARGC] = {"vde_plug"}; + int argc = 1; + int rv; + int sv[2]; + struct vector_fds *result = NULL; + + char *vnl = uml_vector_fetch_arg(ifspec,"vnl"); + char *descr = uml_vector_fetch_arg(ifspec,"descr"); + char *port = uml_vector_fetch_arg(ifspec,"port"); + char *mode = uml_vector_fetch_arg(ifspec,"mode"); + char *group = uml_vector_fetch_arg(ifspec,"group"); + if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION; + + argv[argc++] = "--descr"; + argv[argc++] = descr; + if (port != NULL) { + argv[argc++] = "--port2"; + argv[argc++] = port; + } + if (mode != NULL) { + argv[argc++] = "--mod2"; + argv[argc++] = mode; + } + if (group != NULL) { + argv[argc++] = "--group2"; + argv[argc++] = group; + } + argv[argc++] = seqpacketvnl; + argv[argc++] = vnl; + argv[argc++] = NULL; + + rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno); + return NULL; + } + rv = os_set_exec_close(sv[0]); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno); + goto vde_cleanup_sv; + } + snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]); + + run_helper(NULL, NULL, argv); + + close(sv[1]); + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "fd open: allocation failed"); + goto vde_cleanup; + } + + result->rx_fd = sv[0]; + result->tx_fd = sv[0]; + result->remote_addr_size = 0; + result->remote_addr = NULL; + return result; + +vde_cleanup_sv: + close(sv[1]); +vde_cleanup: + close(sv[0]); + return NULL; +} + static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) { int rxfd = -1, txfd = -1; @@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open( return user_init_unix_fds(parsed, ID_BESS); if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) return user_init_fd_fds(parsed); + if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0) + return user_init_vde_fds(parsed); return NULL; } diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h index 6f147cd3c9f7..fcfa3b7e021b 100644 --- a/arch/um/drivers/vhost_user.h +++ b/arch/um/drivers/vhost_user.h @@ -10,6 +10,7 @@ /* Feature bits */ #define VHOST_USER_F_PROTOCOL_FEATURES 30 /* Protocol feature bits */ +#define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CONFIG 9 @@ -23,7 +24,8 @@ /* Supported transport features */ #define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES) /* Supported protocol features */ -#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ +#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 97a37c062997..b83b5a765d4e 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -5,45 +5,19 @@ */ #include <linux/module.h> #include <linux/pci.h> -#include <linux/virtio.h> -#include <linux/virtio_config.h> #include <linux/logic_iomem.h> #include <linux/of_platform.h> #include <linux/irqdomain.h> -#include <linux/virtio_pcidev.h> -#include <linux/virtio-uml.h> -#include <linux/delay.h> #include <linux/msi.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <irq_kern.h> +#include "virt-pci.h" + #define MAX_DEVICES 8 #define MAX_MSI_VECTORS 32 #define CFG_SPACE_SIZE 4096 -/* for MSI-X we have a 32-bit payload */ -#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) -#define NUM_IRQ_MSGS 10 - -#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1)) -#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1) - -struct um_pci_device { - struct virtio_device *vdev; - - /* for now just standard BARs */ - u8 resptr[PCI_STD_NUM_BARS]; - - struct virtqueue *cmd_vq, *irq_vq; - -#define UM_PCI_STAT_WAITING 0 - unsigned long status; - - int irq; - - bool platform; -}; - struct um_pci_device_reg { struct um_pci_device *dev; void __iomem *iomem; @@ -58,150 +32,15 @@ static struct irq_domain *um_pci_inner_domain; static struct irq_domain *um_pci_msi_domain; static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; -static unsigned int um_pci_max_delay_us = 40000; -module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644); - -struct um_pci_message_buffer { - struct virtio_pcidev_msg hdr; - u8 data[8]; -}; - -static struct um_pci_message_buffer __percpu *um_pci_msg_bufs; - -static int um_pci_send_cmd(struct um_pci_device *dev, - struct virtio_pcidev_msg *cmd, - unsigned int cmd_size, - const void *extra, unsigned int extra_size, - void *out, unsigned int out_size) -{ - struct scatterlist out_sg, extra_sg, in_sg; - struct scatterlist *sgs_list[] = { - [0] = &out_sg, - [1] = extra ? &extra_sg : &in_sg, - [2] = extra ? &in_sg : NULL, - }; - struct um_pci_message_buffer *buf; - int delay_count = 0; - int ret, len; - bool posted; - - if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) - return -EINVAL; - - switch (cmd->op) { - case VIRTIO_PCIDEV_OP_CFG_WRITE: - case VIRTIO_PCIDEV_OP_MMIO_WRITE: - case VIRTIO_PCIDEV_OP_MMIO_MEMSET: - /* in PCI, writes are posted, so don't wait */ - posted = !out; - WARN_ON(!posted); - break; - default: - posted = false; - break; - } - - buf = get_cpu_var(um_pci_msg_bufs); - if (buf) - memcpy(buf, cmd, cmd_size); - - if (posted) { - u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); - - if (ncmd) { - memcpy(ncmd, cmd, cmd_size); - if (extra) - memcpy(ncmd + cmd_size, extra, extra_size); - cmd = (void *)ncmd; - cmd_size += extra_size; - extra = NULL; - extra_size = 0; - } else { - /* try without allocating memory */ - posted = false; - cmd = (void *)buf; - } - } else { - cmd = (void *)buf; - } - - sg_init_one(&out_sg, cmd, cmd_size); - if (extra) - sg_init_one(&extra_sg, extra, extra_size); - if (out) - sg_init_one(&in_sg, out, out_size); - - /* add to internal virtio queue */ - ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, - extra ? 2 : 1, - out ? 1 : 0, - posted ? cmd : HANDLE_NO_FREE(cmd), - GFP_ATOMIC); - if (ret) { - if (posted) - kfree(cmd); - goto out; - } - - if (posted) { - virtqueue_kick(dev->cmd_vq); - ret = 0; - goto out; - } - - /* kick and poll for getting a response on the queue */ - set_bit(UM_PCI_STAT_WAITING, &dev->status); - virtqueue_kick(dev->cmd_vq); - - while (1) { - void *completed = virtqueue_get_buf(dev->cmd_vq, &len); - - if (completed == HANDLE_NO_FREE(cmd)) - break; - - if (completed && !HANDLE_IS_NO_FREE(completed)) - kfree(completed); - - if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || - ++delay_count > um_pci_max_delay_us, - "um virt-pci delay: %d", delay_count)) { - ret = -EIO; - break; - } - udelay(1); - } - clear_bit(UM_PCI_STAT_WAITING, &dev->status); - -out: - put_cpu_var(um_pci_msg_bufs); - return ret; -} - static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, int size) { struct um_pci_device_reg *reg = priv; struct um_pci_device *dev = reg->dev; - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_READ, - .size = size, - .addr = offset, - }; - /* buf->data is maximum size - we may only use parts of it */ - struct um_pci_message_buffer *buf; - u8 *data; - unsigned long ret = ULONG_MAX; - size_t bytes = sizeof(buf->data); if (!dev) return ULONG_MAX; - buf = get_cpu_var(um_pci_msg_bufs); - data = buf->data; - - if (buf) - memset(data, 0xff, bytes); - switch (size) { case 1: case 2: @@ -212,34 +51,10 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - goto out; - } - - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) - goto out; - - switch (size) { - case 1: - ret = data[0]; - break; - case 2: - ret = le16_to_cpup((void *)data); - break; - case 4: - ret = le32_to_cpup((void *)data); - break; -#ifdef CONFIG_64BIT - case 8: - ret = le64_to_cpup((void *)data); - break; -#endif - default: - break; + return ULONG_MAX; } -out: - put_cpu_var(um_pci_msg_bufs); - return ret; + return dev->ops->cfgspace_read(dev, offset, size); } static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, @@ -247,42 +62,24 @@ static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, { struct um_pci_device_reg *reg = priv; struct um_pci_device *dev = reg->dev; - struct { - struct virtio_pcidev_msg hdr; - /* maximum size - we may only use parts of it */ - u8 data[8]; - } msg = { - .hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_WRITE, - .size = size, - .addr = offset, - }, - }; if (!dev) return; switch (size) { case 1: - msg.data[0] = (u8)val; - break; case 2: - put_unaligned_le16(val, (void *)msg.data); - break; case 4: - put_unaligned_le32(val, (void *)msg.data); - break; #ifdef CONFIG_64BIT case 8: - put_unaligned_le64(val, (void *)msg.data); - break; #endif + break; default: WARN(1, "invalid config space write size %d\n", size); return; } - WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); + dev->ops->cfgspace_write(dev, offset, size, val); } static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { @@ -290,35 +87,14 @@ static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { .write = um_pci_cfgspace_write, }; -static void um_pci_bar_copy_from(void *priv, void *buffer, - unsigned int offset, int size) +static unsigned long um_pci_bar_read(void *priv, unsigned int offset, + int size) { u8 *resptr = priv; struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_MMIO_READ, - .bar = *resptr, - .size = size, - .addr = offset, - }; - - memset(buffer, 0xff, size); - - um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); -} - -static unsigned long um_pci_bar_read(void *priv, unsigned int offset, - int size) -{ - /* buf->data is maximum size - we may only use parts of it */ - struct um_pci_message_buffer *buf; - u8 *data; - unsigned long ret = ULONG_MAX; - - buf = get_cpu_var(um_pci_msg_bufs); - data = buf->data; + u8 bar = *resptr; switch (size) { case 1: @@ -329,80 +105,60 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset, #endif break; default: - WARN(1, "invalid config space read size %d\n", size); - goto out; + WARN(1, "invalid bar read size %d\n", size); + return ULONG_MAX; } - um_pci_bar_copy_from(priv, data, offset, size); + return dev->ops->bar_read(dev, bar, offset, size); +} + +static void um_pci_bar_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; switch (size) { case 1: - ret = data[0]; - break; case 2: - ret = le16_to_cpup((void *)data); - break; case 4: - ret = le32_to_cpup((void *)data); - break; #ifdef CONFIG_64BIT case 8: - ret = le64_to_cpup((void *)data); - break; #endif - default: break; + default: + WARN(1, "invalid bar write size %d\n", size); + return; } -out: - put_cpu_var(um_pci_msg_bufs); - return ret; + dev->ops->bar_write(dev, bar, offset, size, val); } -static void um_pci_bar_copy_to(void *priv, unsigned int offset, - const void *buffer, int size) +static void um_pci_bar_copy_from(void *priv, void *buffer, + unsigned int offset, int size) { u8 *resptr = priv; struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, - .bar = *resptr, - .size = size, - .addr = offset, - }; + u8 bar = *resptr; - um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); + dev->ops->bar_copy_from(dev, bar, buffer, offset, size); } -static void um_pci_bar_write(void *priv, unsigned int offset, int size, - unsigned long val) +static void um_pci_bar_copy_to(void *priv, unsigned int offset, + const void *buffer, int size) { - /* maximum size - we may only use parts of it */ - u8 data[8]; - - switch (size) { - case 1: - data[0] = (u8)val; - break; - case 2: - put_unaligned_le16(val, (void *)data); - break; - case 4: - put_unaligned_le32(val, (void *)data); - break; -#ifdef CONFIG_64BIT - case 8: - put_unaligned_le64(val, (void *)data); - break; -#endif - default: - WARN(1, "invalid config space write size %d\n", size); - return; - } + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; - um_pci_bar_copy_to(priv, offset, data, size); + dev->ops->bar_copy_to(dev, bar, offset, buffer, size); } static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) @@ -411,20 +167,9 @@ static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct { - struct virtio_pcidev_msg hdr; - u8 data; - } msg = { - .hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_WRITE, - .bar = *resptr, - .size = size, - .addr = offset, - }, - .data = value, - }; + u8 bar = *resptr; - um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); + dev->ops->bar_set(dev, bar, offset, value, size); } static const struct logic_iomem_ops um_pci_device_bar_ops = { @@ -471,79 +216,6 @@ static void um_pci_rescan(void) pci_unlock_rescan_remove(); } -static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) -{ - struct scatterlist sg[1]; - - sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); - if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) - kfree(buf); - else if (kick) - virtqueue_kick(vq); -} - -static void um_pci_handle_irq_message(struct virtqueue *vq, - struct virtio_pcidev_msg *msg) -{ - struct virtio_device *vdev = vq->vdev; - struct um_pci_device *dev = vdev->priv; - - if (!dev->irq) - return; - - /* we should properly chain interrupts, but on ARCH=um we don't care */ - - switch (msg->op) { - case VIRTIO_PCIDEV_OP_INT: - generic_handle_irq(dev->irq); - break; - case VIRTIO_PCIDEV_OP_MSI: - /* our MSI message is just the interrupt number */ - if (msg->size == sizeof(u32)) - generic_handle_irq(le32_to_cpup((void *)msg->data)); - else - generic_handle_irq(le16_to_cpup((void *)msg->data)); - break; - case VIRTIO_PCIDEV_OP_PME: - /* nothing to do - we already woke up due to the message */ - break; - default: - dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); - break; - } -} - -static void um_pci_cmd_vq_cb(struct virtqueue *vq) -{ - struct virtio_device *vdev = vq->vdev; - struct um_pci_device *dev = vdev->priv; - void *cmd; - int len; - - if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) - return; - - while ((cmd = virtqueue_get_buf(vq, &len))) { - if (WARN_ON(HANDLE_IS_NO_FREE(cmd))) - continue; - kfree(cmd); - } -} - -static void um_pci_irq_vq_cb(struct virtqueue *vq) -{ - struct virtio_pcidev_msg *msg; - int len; - - while ((msg = virtqueue_get_buf(vq, &len))) { - if (len >= sizeof(*msg)) - um_pci_handle_irq_message(vq, msg); - - /* recycle the message buffer */ - um_pci_irq_vq_addbuf(vq, msg, true); - } -} - #ifdef CONFIG_OF /* Copied from arch/x86/kernel/devicetree.c */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) @@ -565,199 +237,6 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) } #endif -static int um_pci_init_vqs(struct um_pci_device *dev) -{ - struct virtqueue *vqs[2]; - static const char *const names[2] = { "cmd", "irq" }; - vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb }; - int err, i; - - err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL); - if (err) - return err; - - dev->cmd_vq = vqs[0]; - dev->irq_vq = vqs[1]; - - virtio_device_ready(dev->vdev); - - for (i = 0; i < NUM_IRQ_MSGS; i++) { - void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); - - if (msg) - um_pci_irq_vq_addbuf(dev->irq_vq, msg, false); - } - - virtqueue_kick(dev->irq_vq); - - return 0; -} - -static void __um_pci_virtio_platform_remove(struct virtio_device *vdev, - struct um_pci_device *dev) -{ - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); - - mutex_lock(&um_pci_mtx); - um_pci_platform_device = NULL; - mutex_unlock(&um_pci_mtx); - - kfree(dev); -} - -static int um_pci_virtio_platform_probe(struct virtio_device *vdev, - struct um_pci_device *dev) -{ - int ret; - - dev->platform = true; - - mutex_lock(&um_pci_mtx); - - if (um_pci_platform_device) { - mutex_unlock(&um_pci_mtx); - ret = -EBUSY; - goto out_free; - } - - ret = um_pci_init_vqs(dev); - if (ret) { - mutex_unlock(&um_pci_mtx); - goto out_free; - } - - um_pci_platform_device = dev; - - mutex_unlock(&um_pci_mtx); - - ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); - if (ret) - __um_pci_virtio_platform_remove(vdev, dev); - - return ret; - -out_free: - kfree(dev); - return ret; -} - -static int um_pci_virtio_probe(struct virtio_device *vdev) -{ - struct um_pci_device *dev; - int i, free = -1; - int err = -ENOSPC; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - dev->vdev = vdev; - vdev->priv = dev; - - if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) - return um_pci_virtio_platform_probe(vdev, dev); - - mutex_lock(&um_pci_mtx); - for (i = 0; i < MAX_DEVICES; i++) { - if (um_pci_devices[i].dev) - continue; - free = i; - break; - } - - if (free < 0) - goto error; - - err = um_pci_init_vqs(dev); - if (err) - goto error; - - dev->irq = irq_alloc_desc(numa_node_id()); - if (dev->irq < 0) { - err = dev->irq; - goto err_reset; - } - um_pci_devices[free].dev = dev; - vdev->priv = dev; - - mutex_unlock(&um_pci_mtx); - - device_set_wakeup_enable(&vdev->dev, true); - - /* - * In order to do suspend-resume properly, don't allow VQs - * to be suspended. - */ - virtio_uml_set_no_vq_suspend(vdev, true); - - um_pci_rescan(); - return 0; -err_reset: - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); -error: - mutex_unlock(&um_pci_mtx); - kfree(dev); - return err; -} - -static void um_pci_virtio_remove(struct virtio_device *vdev) -{ - struct um_pci_device *dev = vdev->priv; - int i; - - if (dev->platform) { - of_platform_depopulate(&vdev->dev); - __um_pci_virtio_platform_remove(vdev, dev); - return; - } - - device_set_wakeup_enable(&vdev->dev, false); - - mutex_lock(&um_pci_mtx); - for (i = 0; i < MAX_DEVICES; i++) { - if (um_pci_devices[i].dev != dev) - continue; - - um_pci_devices[i].dev = NULL; - irq_free_desc(dev->irq); - - break; - } - mutex_unlock(&um_pci_mtx); - - if (i < MAX_DEVICES) { - struct pci_dev *pci_dev; - - pci_dev = pci_get_slot(bridge->bus, i); - if (pci_dev) - pci_stop_and_remove_bus_device_locked(pci_dev); - } - - /* Stop all virtqueues */ - virtio_reset_device(vdev); - dev->cmd_vq = NULL; - dev->irq_vq = NULL; - vdev->config->del_vqs(vdev); - - kfree(dev); -} - -static struct virtio_device_id id_table[] = { - { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; -MODULE_DEVICE_TABLE(virtio, id_table); - -static struct virtio_driver um_pci_virtio_driver = { - .driver.name = "virtio-pci", - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = um_pci_virtio_probe, - .remove = um_pci_virtio_remove, -}; - static struct resource virt_cfgspace_resource = { .name = "PCI config space", .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, @@ -876,7 +355,7 @@ static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) } static struct irq_chip um_pci_msi_bottom_irq_chip = { - .name = "UM virtio MSI", + .name = "UM virtual MSI", .irq_compose_msi_msg = um_pci_compose_msi_msg, }; @@ -926,7 +405,7 @@ static const struct irq_domain_ops um_pci_inner_domain_ops = { }; static struct irq_chip um_pci_msi_irq_chip = { - .name = "UM virtio PCIe MSI", + .name = "UM virtual PCIe MSI", .irq_mask = pci_msi_mask_irq, .irq_unmask = pci_msi_unmask_irq, }; @@ -985,8 +464,85 @@ static struct resource virt_platform_resource = { .flags = IORESOURCE_MEM, }; +int um_pci_device_register(struct um_pci_device *dev) +{ + int i, free = -1; + int err = 0; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev) + continue; + free = i; + break; + } + + if (free < 0) { + err = -ENOSPC; + goto out; + } + + dev->irq = irq_alloc_desc(numa_node_id()); + if (dev->irq < 0) { + err = dev->irq; + goto out; + } + + um_pci_devices[free].dev = dev; + +out: + mutex_unlock(&um_pci_mtx); + if (!err) + um_pci_rescan(); + return err; +} + +void um_pci_device_unregister(struct um_pci_device *dev) +{ + int i; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev != dev) + continue; + um_pci_devices[i].dev = NULL; + irq_free_desc(dev->irq); + break; + } + mutex_unlock(&um_pci_mtx); + + if (i < MAX_DEVICES) { + struct pci_dev *pci_dev; + + pci_dev = pci_get_slot(bridge->bus, i); + if (pci_dev) + pci_stop_and_remove_bus_device_locked(pci_dev); + } +} + +int um_pci_platform_device_register(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device) + return -EBUSY; + um_pci_platform_device = dev; + return 0; +} + +void um_pci_platform_device_unregister(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device == dev) + um_pci_platform_device = NULL; +} + static int __init um_pci_init(void) { + struct irq_domain_info inner_domain_info = { + .size = MAX_MSI_VECTORS, + .hwirq_max = MAX_MSI_VECTORS, + .ops = &um_pci_inner_domain_ops, + }; int err, i; WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource, @@ -996,14 +552,6 @@ static int __init um_pci_init(void) WARN_ON(logic_iomem_add_region(&virt_platform_resource, &um_pci_platform_ops)); - if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, - "No virtio device ID configured for PCI - no PCI support\n")) - return 0; - - um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer); - if (!um_pci_msg_bufs) - return -ENOMEM; - bridge = pci_alloc_host_bridge(0); if (!bridge) { err = -ENOMEM; @@ -1016,11 +564,10 @@ static int __init um_pci_init(void) goto free; } - um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS, - MAX_MSI_VECTORS, 0, - &um_pci_inner_domain_ops, NULL); - if (!um_pci_inner_domain) { - err = -ENOMEM; + inner_domain_info.fwnode = um_pci_fwnode; + um_pci_inner_domain = irq_domain_instantiate(&inner_domain_info); + if (IS_ERR(um_pci_inner_domain)) { + err = PTR_ERR(um_pci_inner_domain); goto free; } @@ -1052,12 +599,10 @@ static int __init um_pci_init(void) if (err) goto free; - err = register_virtio_driver(&um_pci_virtio_driver); - if (err) - goto free; return 0; + free: - if (um_pci_inner_domain) + if (!IS_ERR_OR_NULL(um_pci_inner_domain)) irq_domain_remove(um_pci_inner_domain); if (um_pci_fwnode) irq_domain_free_fwnode(um_pci_fwnode); @@ -1065,18 +610,15 @@ free: pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); } - free_percpu(um_pci_msg_bufs); return err; } -module_init(um_pci_init); +device_initcall(um_pci_init); static void __exit um_pci_exit(void) { - unregister_virtio_driver(&um_pci_virtio_driver); irq_domain_remove(um_pci_msi_domain); irq_domain_remove(um_pci_inner_domain); pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); - free_percpu(um_pci_msg_bufs); } module_exit(um_pci_exit); diff --git a/arch/um/drivers/virt-pci.h b/arch/um/drivers/virt-pci.h new file mode 100644 index 000000000000..b20d1475d1eb --- /dev/null +++ b/arch/um/drivers/virt-pci.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_VIRT_PCI_H +#define __UM_VIRT_PCI_H + +#include <linux/pci.h> + +struct um_pci_device { + const struct um_pci_ops *ops; + + /* for now just standard BARs */ + u8 resptr[PCI_STD_NUM_BARS]; + + int irq; +}; + +struct um_pci_ops { + unsigned long (*cfgspace_read)(struct um_pci_device *dev, + unsigned int offset, int size); + void (*cfgspace_write)(struct um_pci_device *dev, unsigned int offset, + int size, unsigned long val); + + unsigned long (*bar_read)(struct um_pci_device *dev, int bar, + unsigned int offset, int size); + void (*bar_write)(struct um_pci_device *dev, int bar, + unsigned int offset, int size, unsigned long val); + + void (*bar_copy_from)(struct um_pci_device *dev, int bar, void *buffer, + unsigned int offset, int size); + void (*bar_copy_to)(struct um_pci_device *dev, int bar, + unsigned int offset, const void *buffer, int size); + void (*bar_set)(struct um_pci_device *dev, int bar, + unsigned int offset, u8 value, int size); +}; + +int um_pci_device_register(struct um_pci_device *dev); +void um_pci_device_unregister(struct um_pci_device *dev); + +int um_pci_platform_device_register(struct um_pci_device *dev); +void um_pci_platform_device_unregister(struct um_pci_device *dev); + +#endif /* __UM_VIRT_PCI_H */ diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c new file mode 100644 index 000000000000..3c4c4c928fdd --- /dev/null +++ b/arch/um/drivers/virtio_pcidev.c @@ -0,0 +1,628 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/logic_iomem.h> +#include <linux/of_platform.h> +#include <linux/irqdomain.h> +#include <linux/virtio_pcidev.h> +#include <linux/virtio-uml.h> +#include <linux/delay.h> +#include <linux/msi.h> +#include <linux/unaligned.h> +#include <irq_kern.h> + +#include "virt-pci.h" + +#define to_virtio_pcidev(_pdev) \ + container_of(_pdev, struct virtio_pcidev_device, pdev) + +/* for MSI-X we have a 32-bit payload */ +#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) +#define NUM_IRQ_MSGS 10 + +struct virtio_pcidev_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; + +struct virtio_pcidev_device { + struct um_pci_device pdev; + struct virtio_device *vdev; + + struct virtqueue *cmd_vq, *irq_vq; + +#define VIRTIO_PCIDEV_WRITE_BUFS 20 + struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS); + +#define UM_PCI_STAT_WAITING 0 + unsigned long status; + + bool platform; +}; + +static unsigned int virtio_pcidev_max_delay_us = 40000; +module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644); + +static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted) +{ + int i; + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (!test_and_set_bit(i, dev->used_bufs)) + return i; + } + + *posted = false; + return VIRTIO_PCIDEV_WRITE_BUFS; +} + +static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf) +{ + int i; + + if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) { + kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]); + dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL; + return; + } + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (buf == &dev->bufs[i]) { + kfree(dev->extra_ptrs[i]); + dev->extra_ptrs[i] = NULL; + WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); + return; + } + } + + WARN_ON(1); +} + +static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev, + struct virtio_pcidev_msg *cmd, + unsigned int cmd_size, + const void *extra, unsigned int extra_size, + void *out, unsigned int out_size) +{ + struct scatterlist out_sg, extra_sg, in_sg; + struct scatterlist *sgs_list[] = { + [0] = &out_sg, + [1] = extra ? &extra_sg : &in_sg, + [2] = extra ? &in_sg : NULL, + }; + struct virtio_pcidev_message_buffer *buf; + int delay_count = 0; + bool bounce_out; + int ret, len; + int buf_idx; + bool posted; + + if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) + return -EINVAL; + + switch (cmd->op) { + case VIRTIO_PCIDEV_OP_CFG_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: + /* in PCI, writes are posted, so don't wait */ + posted = !out; + WARN_ON(!posted); + break; + default: + posted = false; + break; + } + + bounce_out = !posted && cmd_size <= sizeof(*cmd) && + out && out_size <= sizeof(buf->data); + + buf_idx = virtio_pcidev_get_buf(dev, &posted); + buf = &dev->bufs[buf_idx]; + memcpy(buf, cmd, cmd_size); + + if (posted && extra && extra_size > sizeof(buf) - cmd_size) { + dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, + GFP_ATOMIC); + + if (!dev->extra_ptrs[buf_idx]) { + virtio_pcidev_free_buf(dev, buf); + return -ENOMEM; + } + extra = dev->extra_ptrs[buf_idx]; + } else if (extra && extra_size <= sizeof(buf) - cmd_size) { + memcpy((u8 *)buf + cmd_size, extra, extra_size); + cmd_size += extra_size; + extra_size = 0; + extra = NULL; + cmd = (void *)buf; + } else { + cmd = (void *)buf; + } + + sg_init_one(&out_sg, cmd, cmd_size); + if (extra) + sg_init_one(&extra_sg, extra, extra_size); + /* allow stack for small buffers */ + if (bounce_out) + sg_init_one(&in_sg, buf->data, out_size); + else if (out) + sg_init_one(&in_sg, out, out_size); + + /* add to internal virtio queue */ + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, + extra ? 2 : 1, + out ? 1 : 0, + cmd, GFP_ATOMIC); + if (ret) { + virtio_pcidev_free_buf(dev, buf); + return ret; + } + + if (posted) { + virtqueue_kick(dev->cmd_vq); + return 0; + } + + /* kick and poll for getting a response on the queue */ + set_bit(UM_PCI_STAT_WAITING, &dev->status); + virtqueue_kick(dev->cmd_vq); + ret = 0; + + while (1) { + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); + + if (completed == buf) + break; + + if (completed) + virtio_pcidev_free_buf(dev, completed); + + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || + ++delay_count > virtio_pcidev_max_delay_us, + "um virt-pci delay: %d", delay_count)) { + ret = -EIO; + break; + } + udelay(1); + } + clear_bit(UM_PCI_STAT_WAITING, &dev->status); + + if (bounce_out) + memcpy(out, buf->data, out_size); + + virtio_pcidev_free_buf(dev, buf); + + return ret; +} + +static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_READ, + .size = size, + .addr = offset, + }; + /* max 8, we might not use it all */ + u8 data[8]; + + memset(data, 0xff, sizeof(data)); + + /* size has been checked in um_pci_cfgspace_read() */ + if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) + return ULONG_MAX; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev, + unsigned int offset, int size, + unsigned long val) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + /* maximum size - we may only use parts of it */ + u8 data[8]; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .size = size, + .addr = offset, + }, + }; + + /* size has been checked in um_pci_cfgspace_write() */ + switch (size) { + case 1: + msg.data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)msg.data); + break; + case 4: + put_unaligned_le32(val, (void *)msg.data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)msg.data); + break; +#endif + } + + WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); +} + +static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev, + int bar, void *buffer, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_READ, + .bar = bar, + .size = size, + .addr = offset, + }; + + memset(buffer, 0xff, size); + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); +} + +static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar, + unsigned int offset, int size) +{ + /* 8 is maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_read() */ + virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev, + int bar, unsigned int offset, + const void *buffer, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }; + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); +} + +static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar, + unsigned int offset, int size, + unsigned long val) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_write() */ + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size); +} + +static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar, + unsigned int offset, u8 value, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + u8 data; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }, + .data = value, + }; + + virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); +} + +static const struct um_pci_ops virtio_pcidev_um_pci_ops = { + .cfgspace_read = virtio_pcidev_cfgspace_read, + .cfgspace_write = virtio_pcidev_cfgspace_write, + .bar_read = virtio_pcidev_bar_read, + .bar_write = virtio_pcidev_bar_write, + .bar_copy_from = virtio_pcidev_bar_copy_from, + .bar_copy_to = virtio_pcidev_bar_copy_to, + .bar_set = virtio_pcidev_bar_set, +}; + +static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) + kfree(buf); + else if (kick) + virtqueue_kick(vq); +} + +static void virtio_pcidev_handle_irq_message(struct virtqueue *vq, + struct virtio_pcidev_msg *msg) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + + if (!dev->pdev.irq) + return; + + /* we should properly chain interrupts, but on ARCH=um we don't care */ + + switch (msg->op) { + case VIRTIO_PCIDEV_OP_INT: + generic_handle_irq(dev->pdev.irq); + break; + case VIRTIO_PCIDEV_OP_MSI: + /* our MSI message is just the interrupt number */ + if (msg->size == sizeof(u32)) + generic_handle_irq(le32_to_cpup((void *)msg->data)); + else + generic_handle_irq(le16_to_cpup((void *)msg->data)); + break; + case VIRTIO_PCIDEV_OP_PME: + /* nothing to do - we already woke up due to the message */ + break; + default: + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); + break; + } +} + +static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + void *cmd; + int len; + + if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) + return; + + while ((cmd = virtqueue_get_buf(vq, &len))) + virtio_pcidev_free_buf(dev, cmd); +} + +static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq) +{ + struct virtio_pcidev_msg *msg; + int len; + + while ((msg = virtqueue_get_buf(vq, &len))) { + if (len >= sizeof(*msg)) + virtio_pcidev_handle_irq_message(vq, msg); + + /* recycle the message buffer */ + virtio_pcidev_irq_vq_addbuf(vq, msg, true); + } +} + +static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev) +{ + struct virtqueue_info vqs_info[] = { + { "cmd", virtio_pcidev_cmd_vq_cb }, + { "irq", virtio_pcidev_irq_vq_cb }, + }; + struct virtqueue *vqs[2]; + int err, i; + + err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); + if (err) + return err; + + dev->cmd_vq = vqs[0]; + dev->irq_vq = vqs[1]; + + virtio_device_ready(dev->vdev); + + for (i = 0; i < NUM_IRQ_MSGS; i++) { + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); + + if (msg) + virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false); + } + + virtqueue_kick(dev->irq_vq); + + return 0; +} + +static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + um_pci_platform_device_unregister(&dev->pdev); + + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + int err; + + dev->platform = true; + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_platform_device_register(&dev->pdev); + if (err) + goto err_reset; + + err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); + if (err) + goto err_unregister; + + return 0; + +err_unregister: + um_pci_platform_device_unregister(&dev->pdev); +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static int virtio_pcidev_virtio_probe(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->vdev = vdev; + vdev->priv = dev; + + dev->pdev.ops = &virtio_pcidev_um_pci_ops; + + if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) + return virtio_pcidev_virtio_platform_probe(vdev, dev); + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_device_register(&dev->pdev); + if (err) + goto err_reset; + + device_set_wakeup_enable(&vdev->dev, true); + + /* + * In order to do suspend-resume properly, don't allow VQs + * to be suspended. + */ + virtio_uml_set_no_vq_suspend(vdev, true); + + return 0; + +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static void virtio_pcidev_virtio_remove(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev = vdev->priv; + + if (dev->platform) { + of_platform_depopulate(&vdev->dev); + __virtio_pcidev_virtio_platform_remove(vdev, dev); + return; + } + + device_set_wakeup_enable(&vdev->dev, false); + + um_pci_device_unregister(&dev->pdev); + + /* Stop all virtqueues */ + virtio_reset_device(vdev); + dev->cmd_vq = NULL; + dev->irq_vq = NULL; + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static struct virtio_device_id id_table[] = { + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver virtio_pcidev_virtio_driver = { + .driver.name = "virtio-pci", + .id_table = id_table, + .probe = virtio_pcidev_virtio_probe, + .remove = virtio_pcidev_virtio_remove, +}; + +static int __init virtio_pcidev_init(void) +{ + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, + "No virtio device ID configured for PCI - no PCI support\n")) + return 0; + + return register_virtio_driver(&virtio_pcidev_virtio_driver); +} +late_initcall(virtio_pcidev_init); + +static void __exit virtio_pcidev_exit(void) +{ + unregister_virtio_driver(&virtio_pcidev_virtio_driver); +} +module_exit(virtio_pcidev_exit); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 8adca2000e51..ad8d78fb1d9a 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -52,10 +52,11 @@ struct virtio_uml_device { struct platform_device *pdev; struct virtio_uml_platform_data *pdata; - spinlock_t sock_lock; + raw_spinlock_t sock_lock; int sock, req_fd, irq; u64 features; u64 protocol_features; + u64 max_vqs; u8 status; u8 registered:1; u8 suspended:1; @@ -72,8 +73,6 @@ struct virtio_uml_vq_info { bool suspended; }; -extern unsigned long long physmem_size, highmem; - #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) /* Vhost-user protocol */ @@ -247,7 +246,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, if (request_ack) msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; - spin_lock_irqsave(&vu_dev->sock_lock, flags); + raw_spin_lock_irqsave(&vu_dev->sock_lock, flags); rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); if (rc < 0) goto out; @@ -267,7 +266,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, } out: - spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + raw_spin_unlock_irqrestore(&vu_dev->sock_lock, flags); return rc; } @@ -343,6 +342,17 @@ static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, protocol_features); } +static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev, + u64 *queue_num) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_QUEUE_NUM); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, queue_num); +} + static void vhost_user_reply(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, int response) { @@ -516,6 +526,15 @@ static int vhost_user_init(struct virtio_uml_device *vu_dev) return rc; } + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) { + rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs); + if (rc) + return rc; + } else { + vu_dev->max_vqs = U64_MAX; + } + return 0; } @@ -625,7 +644,7 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) { struct vhost_user_msg msg = { .header.request = VHOST_USER_SET_MEM_TABLE, - .header.size = sizeof(msg.payload.mem_regions), + .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]), .payload.mem_regions.num = 1, }; unsigned long reserved = uml_reserved - uml_physmem; @@ -673,13 +692,6 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) if (rc < 0) return rc; - if (highmem) { - msg.payload.mem_regions.num++; - rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, - &fds[1], &msg.payload.mem_regions.regions[1]); - if (rc < 0) - return rc; - } return vhost_user_send(vu_dev, false, &msg, fds, msg.payload.mem_regions.num); @@ -897,7 +909,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, { struct virtio_uml_vq_info *info = vq->priv; int call_fds[2]; - int rc; + int rc, irq; /* no call FD needed/desired in this case */ if (vu_dev->protocol_features & @@ -914,19 +926,23 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, return rc; info->call_fd = call_fds[0]; - rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, - vu_interrupt, IRQF_SHARED, info->name, vq); - if (rc < 0) + irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (irq < 0) { + rc = irq; goto close_both; + } rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); if (rc) goto release_irq; + vu_dev->irq = irq; + goto out; release_irq: - um_free_irq(vu_dev->irq, vq); + um_free_irq(irq, vq); close_both: os_close_file(call_fds[0]); out: @@ -1014,8 +1030,8 @@ error_kzalloc: } static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + struct virtqueue *vqs[], + struct virtqueue_info vqs_info[], struct irq_affinity *desc) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); @@ -1023,7 +1039,9 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vq; /* not supported for now */ - if (WARN_ON(nvqs > 64)) + if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs, + "%d VQs requested, only up to 64 or %lld supported\n", + nvqs, vu_dev->max_vqs)) return -EINVAL; rc = vhost_user_set_mem_table(vu_dev); @@ -1031,13 +1049,15 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, return rc; for (i = 0; i < nvqs; ++i) { - if (!names[i]) { + struct virtqueue_info *vqi = &vqs_info[i]; + + if (!vqi->name) { vqs[i] = NULL; continue; } - vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], - ctx ? ctx[i] : false); + vqs[i] = vu_setup_vq(vdev, queue_idx++, vqi->callback, + vqi->name, vqi->ctx); if (IS_ERR(vqs[i])) { rc = PTR_ERR(vqs[i]); goto error_setup; @@ -1208,6 +1228,7 @@ static int virtio_uml_probe(struct platform_device *pdev) vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; vu_dev->pdev = pdev; vu_dev->req_fd = -1; + vu_dev->irq = UM_IRQ_ALLOC; time_travel_propagate_time(); @@ -1218,7 +1239,7 @@ static int virtio_uml_probe(struct platform_device *pdev) goto error_free; vu_dev->sock = rc; - spin_lock_init(&vu_dev->sock_lock); + raw_spin_lock_init(&vu_dev->sock_lock); rc = vhost_user_init(vu_dev); if (rc) @@ -1241,12 +1262,11 @@ error_free: return rc; } -static int virtio_uml_remove(struct platform_device *pdev) +static void virtio_uml_remove(struct platform_device *pdev) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); unregister_virtio_device(&vu_dev->vdev); - return 0; } /* Command line device list */ diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index 6918de5e2956..e4316c7981e8 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -156,7 +156,7 @@ static int xterm_open(int input, int output, int primary, void *d, new = xterm_fd(fd, &data->helper_pid); if (new < 0) { err = new; - printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n", + printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n", -err); goto out_kill; } diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index 8011e51993d5..3971252cb1a6 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -21,12 +21,19 @@ struct xterm_wait { static irqreturn_t xterm_interrupt(int irq, void *data) { struct xterm_wait *xterm = data; - int fd; + int fd = -1, n_fds = 1; + ssize_t ret; - fd = os_rcv_fd(xterm->fd, &xterm->pid); - if (fd == -EAGAIN) + ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds, + &xterm->pid, sizeof(xterm->pid)); + if (ret == -EAGAIN) return IRQ_NONE; + if (ret < 0) + fd = ret; + else if (ret != sizeof(xterm->pid)) + fd = -EMSGSIZE; + xterm->new_fd = fd; complete(&xterm->ready); |