From 2202844e4468c7539dba0c0b06577c93735af952 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Mon, 6 Nov 2023 15:22:23 +0800 Subject: vfio/migration: Add debugfs to live migration driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are multiple devices, software and operational steps involved in the process of live migration. An error occurred on any node may cause the live migration operation to fail. This complex process makes it very difficult to locate and analyze the cause when the function fails. In order to quickly locate the cause of the problem when the live migration fails, I added a set of debugfs to the vfio live migration driver. +-------------------------------------------+ | | | | | QEMU | | | | | +---+----------------------------+----------+ | ^ | ^ | | | | | | | | v | v | +---------+--+ +---------+--+ |src vfio_dev| |dst vfio_dev| +--+---------+ +--+---------+ | ^ | ^ | | | | v | | | +-----------+----+ +-----------+----+ |src dev debugfs | |dst dev debugfs | +----------------+ +----------------+ The entire debugfs directory will be based on the definition of the CONFIG_DEBUG_FS macro. If this macro is not enabled, the interfaces in vfio.h will be empty definitions, and the creation and initialization of the debugfs directory will not be executed. vfio | +--- | +---migration | +--state | +--- +---migration +--state debugfs will create a public root directory "vfio" file. then create a dev_name() file for each live migration device. First, create a unified state acquisition file of "migration" in this device directory. Then, create a public live migration state lookup file "state". Signed-off-by: Longfang Liu Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/r/20231106072225.28577-2-liulongfang@huawei.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 10 ++++++ drivers/vfio/Makefile | 1 + drivers/vfio/debugfs.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio.h | 14 ++++++++ drivers/vfio/vfio_main.c | 4 +++ 5 files changed, 121 insertions(+) create mode 100644 drivers/vfio/debugfs.c (limited to 'drivers/vfio') diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 6bda6dbb4878..ceae52fd7586 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -80,6 +80,16 @@ config VFIO_VIRQFD select EVENTFD default n +config VFIO_DEBUGFS + bool "Export VFIO internals in DebugFS" + depends on DEBUG_FS + help + Allows exposure of VFIO device internals. This option enables + the use of debugfs by VFIO drivers as required. The device can + cause the VFIO code create a top-level debug/vfio directory + during initialization, and then populate a subdirectory with + entries as required. + source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 68c05705200f..b2fc9fb499d8 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -7,6 +7,7 @@ vfio-$(CONFIG_VFIO_GROUP) += group.o vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o +vfio-$(CONFIG_VFIO_DEBUGFS) += debugfs.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o diff --git a/drivers/vfio/debugfs.c b/drivers/vfio/debugfs.c new file mode 100644 index 000000000000..298bd866f157 --- /dev/null +++ b/drivers/vfio/debugfs.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, HiSilicon Ltd. + */ + +#include +#include +#include +#include +#include "vfio.h" + +static struct dentry *vfio_debugfs_root; + +static int vfio_device_state_read(struct seq_file *seq, void *data) +{ + struct device *vf_dev = seq->private; + struct vfio_device *vdev = container_of(vf_dev, + struct vfio_device, device); + enum vfio_device_mig_state state; + int ret; + + BUILD_BUG_ON(VFIO_DEVICE_STATE_NR != + VFIO_DEVICE_STATE_PRE_COPY_P2P + 1); + + ret = vdev->mig_ops->migration_get_state(vdev, &state); + if (ret) + return -EINVAL; + + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + seq_puts(seq, "ERROR\n"); + break; + case VFIO_DEVICE_STATE_STOP: + seq_puts(seq, "STOP\n"); + break; + case VFIO_DEVICE_STATE_RUNNING: + seq_puts(seq, "RUNNING\n"); + break; + case VFIO_DEVICE_STATE_STOP_COPY: + seq_puts(seq, "STOP_COPY\n"); + break; + case VFIO_DEVICE_STATE_RESUMING: + seq_puts(seq, "RESUMING\n"); + break; + case VFIO_DEVICE_STATE_RUNNING_P2P: + seq_puts(seq, "RUNNING_P2P\n"); + break; + case VFIO_DEVICE_STATE_PRE_COPY: + seq_puts(seq, "PRE_COPY\n"); + break; + case VFIO_DEVICE_STATE_PRE_COPY_P2P: + seq_puts(seq, "PRE_COPY_P2P\n"); + break; + default: + seq_puts(seq, "Invalid\n"); + } + + return 0; +} + +void vfio_device_debugfs_init(struct vfio_device *vdev) +{ + struct device *dev = &vdev->device; + + vdev->debug_root = debugfs_create_dir(dev_name(vdev->dev), + vfio_debugfs_root); + + if (vdev->mig_ops) { + struct dentry *vfio_dev_migration = NULL; + + vfio_dev_migration = debugfs_create_dir("migration", + vdev->debug_root); + debugfs_create_devm_seqfile(dev, "state", vfio_dev_migration, + vfio_device_state_read); + } +} + +void vfio_device_debugfs_exit(struct vfio_device *vdev) +{ + debugfs_remove_recursive(vdev->debug_root); +} + +void vfio_debugfs_create_root(void) +{ + vfio_debugfs_root = debugfs_create_dir("vfio", NULL); +} + +void vfio_debugfs_remove_root(void) +{ + debugfs_remove_recursive(vfio_debugfs_root); + vfio_debugfs_root = NULL; +} diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 307e3f29b527..bde84ad344e5 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -448,4 +448,18 @@ static inline void vfio_device_put_kvm(struct vfio_device *device) } #endif +#ifdef CONFIG_VFIO_DEBUGFS +void vfio_debugfs_create_root(void); +void vfio_debugfs_remove_root(void); + +void vfio_device_debugfs_init(struct vfio_device *vdev); +void vfio_device_debugfs_exit(struct vfio_device *vdev); +#else +static inline void vfio_debugfs_create_root(void) { } +static inline void vfio_debugfs_remove_root(void) { } + +static inline void vfio_device_debugfs_init(struct vfio_device *vdev) { } +static inline void vfio_device_debugfs_exit(struct vfio_device *vdev) { } +#endif /* CONFIG_VFIO_DEBUGFS */ + #endif diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 8d4995ada74a..1cc93aac99a2 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -311,6 +311,7 @@ static int __vfio_register_dev(struct vfio_device *device, refcount_set(&device->refcount, 1); vfio_device_group_register(device); + vfio_device_debugfs_init(device); return 0; err_out: @@ -378,6 +379,7 @@ void vfio_unregister_group_dev(struct vfio_device *device) } } + vfio_device_debugfs_exit(device); /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } @@ -1676,6 +1678,7 @@ static int __init vfio_init(void) if (ret) goto err_alloc_dev_chrdev; + vfio_debugfs_create_root(); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; @@ -1691,6 +1694,7 @@ err_virqfd: static void __exit vfio_cleanup(void) { + vfio_debugfs_remove_root(); ida_destroy(&vfio.device_ida); vfio_cdev_cleanup(); class_destroy(vfio.device_class); -- cgit v1.2.3 From 4004497cec3093d7b0087bc70709b45969fa07b6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:02 -0800 Subject: vfio/pds: Fix calculations in pds_vfio_dirty_sync The incorrect check is being done for comparing the iova/length being requested to sync. This can cause the dirty sync operation to fail. Fix this by making sure the iova offset added to the requested sync length doesn't exceed the region_size. Also, the region_start is assumed to always be at 0. This can cause dirty tracking to fail because the device/driver bitmap offset always starts at 0, however, the region_start/iova may not. Fix this by determining the iova offset from region_start to determine the bitmap offset. Fixes: f232836a9152 ("vfio/pds: Add support for dirty page tracking") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-2-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index c937aa6f3954..27607d7b9030 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -478,8 +478,7 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, pages, bitmap_size); - if (!length || ((dirty->region_start + iova + length) > - (dirty->region_start + dirty->region_size))) { + if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; @@ -496,7 +495,8 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP(iova / dirty->region_page_size, sizeof(u64)); + bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) / + dirty->region_page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", -- cgit v1.2.3 From 3b8f7a24d1fe79cc3fa1da9be5610b9c3aedbc2a Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:03 -0800 Subject: vfio/pds: Only use a single SGL for both seq and ack Since the seq/ack operations never happen in parallel there is no need for multiple scatter gather lists per region. The current implementation is wasting memory. Fix this by only using a single scatter-gather list for both the seq and ack operations. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-3-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 68 ++++++++++++++++---------------------------- drivers/vfio/pci/pds/dirty.h | 6 ++-- 2 files changed, 28 insertions(+), 46 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 27607d7b9030..4462f6edb0ed 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -100,35 +100,35 @@ static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info) + struct pds_vfio_dirty *dirty) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, bmp_info->sgl_addr, - bmp_info->num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, dirty->sgl_addr, + dirty->num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(bmp_info->sgl); + kfree(dirty->sgl); - bmp_info->num_sge = 0; - bmp_info->sgl = NULL; - bmp_info->sgl_addr = 0; + dirty->num_sge = 0; + dirty->sgl = NULL; + dirty->sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - if (pds_vfio->dirty.host_seq.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_seq); - if (pds_vfio->dirty.host_ack.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_ack); + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + + if (dirty->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, dirty); } -static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_bmp_info *bmp_info, - u32 page_count) +static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, + u32 page_count) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; size_t sgl_size; @@ -147,30 +147,9 @@ static int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - bmp_info->sgl = sgl; - bmp_info->num_sge = max_sge; - bmp_info->sgl_addr = sgl_addr; - - return 0; -} - -static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, - u32 page_count) -{ - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - int err; - - err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_seq, - page_count); - if (err) - return err; - - err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_ack, - page_count); - if (err) { - __pds_vfio_dirty_free_sgl(pds_vfio, &dirty->host_seq); - return err; - } + dirty->sgl = sgl; + dirty->num_sge = max_sge; + dirty->sgl_addr = sgl_addr; return 0; } @@ -328,6 +307,8 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_lm_sg_elem *sgl; unsigned long long npages; struct sg_table sg_table; struct scatterlist *sg; @@ -374,8 +355,9 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, if (err) goto out_free_sg_table; + sgl = pds_vfio->dirty.sgl; for_each_sgtable_dma_sg(&sg_table, sg, i) { - struct pds_lm_sg_elem *sg_elem = &bmp_info->sgl[i]; + struct pds_lm_sg_elem *sg_elem = &sgl[i]; sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); sg_elem->len = cpu_to_le32(sg_dma_len(sg)); @@ -383,15 +365,15 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); - dma_sync_single_for_device(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); - err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, bmp_info->sgl_addr, num_sge, + dma_sync_single_for_device(pdsc_dev, dirty->sgl_addr, size, dma_dir); + err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, dirty->sgl_addr, num_sge, offset, bmp_bytes, read_seq); if (err) dev_err(&pdev->dev, "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", bmp_type_str, offset, bmp_bytes, - num_sge, bmp_info->sgl_addr, ERR_PTR(err)); - dma_sync_single_for_cpu(pdsc_dev, bmp_info->sgl_addr, size, dma_dir); + num_sge, dirty->sgl_addr, ERR_PTR(err)); + dma_sync_single_for_cpu(pdsc_dev, dirty->sgl_addr, size, dma_dir); dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); out_free_sg_table: diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index f78da25d75ca..9de5aac58190 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -7,9 +7,6 @@ struct pds_vfio_bmp_info { unsigned long *bmp; u32 bmp_bytes; - struct pds_lm_sg_elem *sgl; - dma_addr_t sgl_addr; - u16 num_sge; }; struct pds_vfio_dirty { @@ -18,6 +15,9 @@ struct pds_vfio_dirty { u64 region_size; u64 region_start; u64 region_page_size; + struct pds_lm_sg_elem *sgl; + dma_addr_t sgl_addr; + u16 num_sge; bool is_enabled; }; -- cgit v1.2.3 From 3f5898133a706402180908b671d5e781fc48f548 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:04 -0800 Subject: vfio/pds: Move and rename region specific info An upcoming change in this series will add support for multiple regions. To prepare for that, move region specific information into struct pds_vfio_region and rename the members for readability. This will reduce the size of the patch that actually implements multiple region support. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-4-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 83 ++++++++++++++++++++++---------------------- drivers/vfio/pci/pds/dirty.h | 12 ++++--- 2 files changed, 50 insertions(+), 45 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 4462f6edb0ed..8a7f9eed4e59 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -85,18 +85,18 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->host_seq.bmp = host_seq_bmp; - dirty->host_ack.bmp = host_ack_bmp; + dirty->region.host_seq.bmp = host_seq_bmp; + dirty->region.host_ack.bmp = host_ack_bmp; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->host_seq.bmp); - vfree(dirty->host_ack.bmp); - dirty->host_seq.bmp = NULL; - dirty->host_ack.bmp = NULL; + vfree(dirty->region.host_seq.bmp); + vfree(dirty->region.host_ack.bmp); + dirty->region.host_seq.bmp = NULL; + dirty->region.host_ack.bmp = NULL; } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -105,21 +105,21 @@ static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, dirty->sgl_addr, - dirty->num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, dirty->region.sgl_addr, + dirty->region.num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(dirty->sgl); + kfree(dirty->region.sgl); - dirty->num_sge = 0; - dirty->sgl = NULL; - dirty->sgl_addr = 0; + dirty->region.num_sge = 0; + dirty->region.sgl = NULL; + dirty->region.sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - if (dirty->sgl) + if (dirty->region.sgl) __pds_vfio_dirty_free_sgl(pds_vfio, dirty); } @@ -147,9 +147,9 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - dirty->sgl = sgl; - dirty->num_sge = max_sge; - dirty->sgl_addr = sgl_addr; + dirty->region.sgl = sgl; + dirty->region.num_sge = max_sge; + dirty->region.sgl_addr = sgl_addr; return 0; } @@ -267,9 +267,9 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_bitmaps; } - dirty->region_start = region_start; - dirty->region_size = region_size; - dirty->region_page_size = region_page_size; + dirty->region.start = region_start; + dirty->region.size = region_size; + dirty->region.page_size = region_page_size; pds_vfio_dirty_set_enabled(pds_vfio); pds_vfio_print_guest_region_info(pds_vfio, max_regions); @@ -304,11 +304,10 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; + struct pds_vfio_region *region = &pds_vfio->dirty.region; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - struct pds_lm_sg_elem *sgl; unsigned long long npages; struct sg_table sg_table; struct scatterlist *sg; @@ -355,9 +354,8 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, if (err) goto out_free_sg_table; - sgl = pds_vfio->dirty.sgl; for_each_sgtable_dma_sg(&sg_table, sg, i) { - struct pds_lm_sg_elem *sg_elem = &sgl[i]; + struct pds_lm_sg_elem *sg_elem = ®ion->sgl[i]; sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); sg_elem->len = cpu_to_le32(sg_dma_len(sg)); @@ -365,15 +363,15 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); - dma_sync_single_for_device(pdsc_dev, dirty->sgl_addr, size, dma_dir); - err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, dirty->sgl_addr, num_sge, + dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir); + err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge, offset, bmp_bytes, read_seq); if (err) dev_err(&pdev->dev, "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", bmp_type_str, offset, bmp_bytes, - num_sge, dirty->sgl_addr, ERR_PTR(err)); - dma_sync_single_for_cpu(pdsc_dev, dirty->sgl_addr, size, dma_dir); + num_sge, region->sgl_addr, ERR_PTR(err)); + dma_sync_single_for_cpu(pdsc_dev, region->sgl_addr, size, dma_dir); dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); out_free_sg_table: @@ -387,14 +385,18 @@ out_free_pages: static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack, + struct pds_vfio_region *region = &pds_vfio->dirty.region; + + return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_ack, offset, len, WRITE_ACK); } static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq, + struct pds_vfio_region *region = &pds_vfio->dirty.region; + + return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_seq, offset, len, READ_SEQ); } @@ -402,15 +404,15 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, struct iova_bitmap *dirty_bitmap, u32 bmp_offset, u32 len_bytes) { - u64 page_size = pds_vfio->dirty.region_page_size; - u64 region_start = pds_vfio->dirty.region_start; + u64 page_size = pds_vfio->dirty.region.page_size; + u64 region_start = pds_vfio->dirty.region.start; u32 bmp_offset_bit; __le64 *seq, *ack; int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)pds_vfio->dirty.region.host_seq.bmp + bmp_offset); + ack = (__le64 *)((u64)pds_vfio->dirty.region.host_ack.bmp + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -451,25 +453,24 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size); + pages = DIV_ROUND_UP(length, pds_vfio->dirty.region.page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; dev_dbg(dev, "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", - pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, + pds_vfio->vf_id, iova, length, pds_vfio->dirty.region.page_size, pages, bitmap_size); - if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) { + if (!length || ((iova - dirty->region.start + length) > dirty->region.size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; } /* bitmap is modified in 64 bit chunks */ - bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size, - sizeof(u64)), - sizeof(u64)); + bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region.page_size, + sizeof(u64)), sizeof(u64)); if (bmp_bytes != bitmap_size) { dev_err(dev, "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", @@ -477,8 +478,8 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) / - dirty->region_page_size, sizeof(u64)); + bmp_offset = DIV_ROUND_UP((iova - dirty->region.start) / + dirty->region.page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index 9de5aac58190..07662d369e7c 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -9,15 +9,19 @@ struct pds_vfio_bmp_info { u32 bmp_bytes; }; -struct pds_vfio_dirty { +struct pds_vfio_region { struct pds_vfio_bmp_info host_seq; struct pds_vfio_bmp_info host_ack; - u64 region_size; - u64 region_start; - u64 region_page_size; + u64 size; + u64 start; + u64 page_size; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; u16 num_sge; +}; + +struct pds_vfio_dirty { + struct pds_vfio_region region; bool is_enabled; }; -- cgit v1.2.3 From 87bdf9807ed70f5d3a0f852f9d04ceb1c99ad9f6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:05 -0800 Subject: vfio/pds: Pass region info to relevant functions A later patch in the series implements multi-region support. That will require specific regions to be passed to relevant functions. Prepare for that change by passing the region structure to relevant functions. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-5-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 71 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 35 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 8a7f9eed4e59..09fed7c1771a 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -100,35 +100,35 @@ static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, - struct pds_vfio_dirty *dirty) + struct pds_vfio_region *region) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - dma_unmap_single(pdsc_dev, dirty->region.sgl_addr, - dirty->region.num_sge * sizeof(struct pds_lm_sg_elem), + dma_unmap_single(pdsc_dev, region->sgl_addr, + region->num_sge * sizeof(struct pds_lm_sg_elem), DMA_BIDIRECTIONAL); - kfree(dirty->region.sgl); + kfree(region->sgl); - dirty->region.num_sge = 0; - dirty->region.sgl = NULL; - dirty->region.sgl_addr = 0; + region->num_sge = 0; + region->sgl = NULL; + region->sgl_addr = 0; } static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_vfio_region *region = &pds_vfio->dirty.region; - if (dirty->region.sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, dirty); + if (region->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, region); } static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 page_count) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; size_t sgl_size; @@ -147,9 +147,9 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return -EIO; } - dirty->region.sgl = sgl; - dirty->region.num_sge = max_sge; - dirty->region.sgl_addr = sgl_addr; + region->sgl = sgl; + region->num_sge = max_sge; + region->sgl_addr = sgl_addr; return 0; } @@ -260,7 +260,7 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_region_info; } - err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count); + err = pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->region, page_count); if (err) { dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", ERR_PTR(err)); @@ -300,11 +300,11 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) } static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, struct pds_vfio_bmp_info *bmp_info, u32 offset, u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; - struct pds_vfio_region *region = &pds_vfio->dirty.region; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; @@ -383,36 +383,36 @@ out_free_pages: } static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 offset, u32 len) { - struct pds_vfio_region *region = &pds_vfio->dirty.region; - return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_ack, + return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_ack, offset, len, WRITE_ACK); } static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, u32 offset, u32 len) { - struct pds_vfio_region *region = &pds_vfio->dirty.region; - - return pds_vfio_dirty_seq_ack(pds_vfio, ®ion->host_seq, + return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_seq, offset, len, READ_SEQ); } static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, + struct pds_vfio_region *region, struct iova_bitmap *dirty_bitmap, u32 bmp_offset, u32 len_bytes) { - u64 page_size = pds_vfio->dirty.region.page_size; - u64 region_start = pds_vfio->dirty.region.start; + u64 page_size = region->page_size; + u64 region_start = region->start; u32 bmp_offset_bit; __le64 *seq, *ack; int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)pds_vfio->dirty.region.host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)pds_vfio->dirty.region.host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)region->host_seq.bmp + bmp_offset); + ack = (__le64 *)((u64)region->host_ack.bmp + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -441,6 +441,7 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, { struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + struct pds_vfio_region *region = &dirty->region; u64 bmp_offset, bmp_bytes; u64 bitmap_size, pages; int err; @@ -453,23 +454,23 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - pages = DIV_ROUND_UP(length, pds_vfio->dirty.region.page_size); + pages = DIV_ROUND_UP(length, region->page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; dev_dbg(dev, "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", - pds_vfio->vf_id, iova, length, pds_vfio->dirty.region.page_size, + pds_vfio->vf_id, iova, length, region->page_size, pages, bitmap_size); - if (!length || ((iova - dirty->region.start + length) > dirty->region.size)) { + if (!length || ((iova - region->start + length) > region->size)) { dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", iova, length); return -EINVAL; } /* bitmap is modified in 64 bit chunks */ - bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region.page_size, + bmp_bytes = ALIGN(DIV_ROUND_UP(length / region->page_size, sizeof(u64)), sizeof(u64)); if (bmp_bytes != bitmap_size) { dev_err(dev, @@ -478,23 +479,23 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } - bmp_offset = DIV_ROUND_UP((iova - dirty->region.start) / - dirty->region.page_size, sizeof(u64)); + bmp_offset = DIV_ROUND_UP((iova - region->start) / + region->page_size, sizeof(u64)); dev_dbg(dev, "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", iova, length, bmp_offset, bmp_bytes); - err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes); + err = pds_vfio_dirty_read_seq(pds_vfio, region, bmp_offset, bmp_bytes); if (err) return err; - err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset, - bmp_bytes); + err = pds_vfio_dirty_process_bitmaps(pds_vfio, region, dirty_bitmap, + bmp_offset, bmp_bytes); if (err) return err; - err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes); + err = pds_vfio_dirty_write_ack(pds_vfio, region, bmp_offset, bmp_bytes); if (err) return err; -- cgit v1.2.3 From 0c320f223ee6892a413428051f35bebf85fe83d3 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:06 -0800 Subject: vfio/pds: Move seq/ack bitmaps into region struct Since the host seq/ack bitmaps are part of a region move them into struct pds_vfio_region. Also, make use of the bmp_bytes value for validation purposes. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-6-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 35 ++++++++++++++++++++++------------- drivers/vfio/pci/pds/dirty.h | 10 +++------- 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 09fed7c1771a..4824cdfe01ed 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -85,18 +85,20 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->region.host_seq.bmp = host_seq_bmp; - dirty->region.host_ack.bmp = host_ack_bmp; + dirty->region.host_seq = host_seq_bmp; + dirty->region.host_ack = host_ack_bmp; + dirty->region.bmp_bytes = bytes; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->region.host_seq.bmp); - vfree(dirty->region.host_ack.bmp); - dirty->region.host_seq.bmp = NULL; - dirty->region.host_ack.bmp = NULL; + vfree(dirty->region.host_seq); + vfree(dirty->region.host_ack); + dirty->region.host_seq = NULL; + dirty->region.host_ack = NULL; + dirty->region.bmp_bytes = 0; } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -301,8 +303,8 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, struct pds_vfio_region *region, - struct pds_vfio_bmp_info *bmp_info, - u32 offset, u32 bmp_bytes, bool read_seq) + unsigned long *seq_ack_bmp, u32 offset, + u32 bmp_bytes, bool read_seq) { const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; @@ -319,7 +321,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, int err; int i; - bmp = (void *)((u64)bmp_info->bmp + offset); + bmp = (void *)((u64)seq_ack_bmp + offset); page_offset = offset_in_page(bmp); bmp -= page_offset; @@ -387,7 +389,7 @@ static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_ack, + return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_ack, offset, len, WRITE_ACK); } @@ -395,7 +397,7 @@ static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, struct pds_vfio_region *region, u32 offset, u32 len) { - return pds_vfio_dirty_seq_ack(pds_vfio, region, ®ion->host_seq, + return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_seq, offset, len, READ_SEQ); } @@ -411,8 +413,8 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, int dword_count; dword_count = len_bytes / sizeof(u64); - seq = (__le64 *)((u64)region->host_seq.bmp + bmp_offset); - ack = (__le64 *)((u64)region->host_ack.bmp + bmp_offset); + seq = (__le64 *)((u64)region->host_seq + bmp_offset); + ack = (__le64 *)((u64)region->host_ack + bmp_offset); bmp_offset_bit = bmp_offset * 8; for (int i = 0; i < dword_count; i++) { @@ -479,6 +481,13 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } + if (bmp_bytes > region->bmp_bytes) { + dev_err(dev, + "Calculated bitmap bytes %llu larger than region's cached bmp_bytes %llu\n", + bmp_bytes, region->bmp_bytes); + return -EINVAL; + } + bmp_offset = DIV_ROUND_UP((iova - region->start) / region->page_size, sizeof(u64)); diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index 07662d369e7c..a1f6d894f913 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -4,14 +4,10 @@ #ifndef _DIRTY_H_ #define _DIRTY_H_ -struct pds_vfio_bmp_info { - unsigned long *bmp; - u32 bmp_bytes; -}; - struct pds_vfio_region { - struct pds_vfio_bmp_info host_seq; - struct pds_vfio_bmp_info host_ack; + unsigned long *host_seq; + unsigned long *host_ack; + u64 bmp_bytes; u64 size; u64 start; u64 page_size; -- cgit v1.2.3 From 2e7c6feb4ef5257189819359e754625ccf5a1d6c Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 16 Nov 2023 16:12:07 -0800 Subject: vfio/pds: Add multi-region support Only supporting a single region/range is limiting, wasteful, and in some cases broken (i.e. when there are large gaps in the iova memory ranges). Fix this by adding support for multiple regions based on what the device tells the driver it can support. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231117001207.2793-7-brett.creeley@amd.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/pds/dirty.c | 220 ++++++++++++++++++++++++++++++------------- drivers/vfio/pci/pds/dirty.h | 4 +- 2 files changed, 156 insertions(+), 68 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 4824cdfe01ed..8ddf4346fcd5 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -70,7 +70,7 @@ out_free_region_info: kfree(region_info); } -static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, +static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region, unsigned long bytes) { unsigned long *host_seq_bmp, *host_ack_bmp; @@ -85,20 +85,27 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, return -ENOMEM; } - dirty->region.host_seq = host_seq_bmp; - dirty->region.host_ack = host_ack_bmp; - dirty->region.bmp_bytes = bytes; + region->host_seq = host_seq_bmp; + region->host_ack = host_ack_bmp; + region->bmp_bytes = bytes; return 0; } static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) { - vfree(dirty->region.host_seq); - vfree(dirty->region.host_ack); - dirty->region.host_seq = NULL; - dirty->region.host_ack = NULL; - dirty->region.bmp_bytes = 0; + if (!dirty->regions) + return; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + vfree(region->host_seq); + vfree(region->host_ack); + region->host_seq = NULL; + region->host_ack = NULL; + region->bmp_bytes = 0; + } } static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -119,10 +126,17 @@ static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) { - struct pds_vfio_region *region = &pds_vfio->dirty.region; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - if (region->sgl) - __pds_vfio_dirty_free_sgl(pds_vfio, region); + if (!dirty->regions) + return; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + if (region->sgl) + __pds_vfio_dirty_free_sgl(pds_vfio, region); + } } static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, @@ -156,22 +170,90 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, return 0; } +static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty) +{ + vfree(dirty->regions); + dirty->regions = NULL; + dirty->num_regions = 0; +} + +static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio, + struct pds_lm_dirty_region_info *region_info, + u64 region_page_size, u8 num_regions) +{ + struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + u32 dev_bmp_offset_byte = 0; + int err; + + dirty->regions = vcalloc(num_regions, sizeof(struct pds_vfio_region)); + if (!dirty->regions) + return -ENOMEM; + dirty->num_regions = num_regions; + + for (int i = 0; i < num_regions; i++) { + struct pds_lm_dirty_region_info *ri = ®ion_info[i]; + struct pds_vfio_region *region = &dirty->regions[i]; + u64 region_size, region_start; + u32 page_count; + + /* page_count might be adjusted by the device */ + page_count = le32_to_cpu(ri->page_count); + region_start = le64_to_cpu(ri->dma_base); + region_size = page_count * region_page_size; + + err = pds_vfio_dirty_alloc_bitmaps(region, + page_count / BITS_PER_BYTE); + if (err) { + dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", + ERR_PTR(err)); + goto out_free_regions; + } + + err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count); + if (err) { + dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", + ERR_PTR(err)); + goto out_free_regions; + } + + region->size = region_size; + region->start = region_start; + region->page_size = region_page_size; + region->dev_bmp_offset_start_byte = dev_bmp_offset_byte; + + dev_bmp_offset_byte += page_count / BITS_PER_BYTE; + if (dev_bmp_offset_byte % BITS_PER_BYTE) { + dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n"); + err = -EINVAL; + goto out_free_regions; + } + } + + return 0; + +out_free_regions: + pds_vfio_dirty_free_bitmaps(dirty); + pds_vfio_dirty_free_sgl(pds_vfio); + pds_vfio_dirty_free_regions(dirty); + + return err; +} + static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, struct rb_root_cached *ranges, u32 nnodes, u64 *page_size) { struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; struct device *pdsc_dev = &pci_physfn(pdev)->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - u64 region_start, region_size, region_page_size; struct pds_lm_dirty_region_info *region_info; struct interval_tree_node *node = NULL; + u64 region_page_size = *page_size; u8 max_regions = 0, num_regions; dma_addr_t regions_dma = 0; u32 num_ranges = nnodes; - u32 page_count; - u16 len; int err; + u16 len; dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", pds_vfio->vf_id); @@ -198,39 +280,38 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, return -EOPNOTSUPP; } - /* - * Only support 1 region for now. If there are any large gaps in the - * VM's address regions, then this would be a waste of memory as we are - * generating 2 bitmaps (ack/seq) from the min address to the max - * address of the VM's address regions. In the future, if we support - * more than one region in the device/driver we can split the bitmaps - * on the largest address region gaps. We can do this split up to the - * max_regions times returned from the dirty_status command. - */ - max_regions = 1; if (num_ranges > max_regions) { vfio_combine_iova_ranges(ranges, nnodes, max_regions); num_ranges = max_regions; } + region_info = kcalloc(num_ranges, sizeof(*region_info), GFP_KERNEL); + if (!region_info) + return -ENOMEM; + len = num_ranges * sizeof(*region_info); + node = interval_tree_iter_first(ranges, 0, ULONG_MAX); if (!node) return -EINVAL; + for (int i = 0; i < num_ranges; i++) { + struct pds_lm_dirty_region_info *ri = ®ion_info[i]; + u64 region_size = node->last - node->start + 1; + u64 region_start = node->start; + u32 page_count; - region_size = node->last - node->start + 1; - region_start = node->start; - region_page_size = *page_size; + page_count = DIV_ROUND_UP(region_size, region_page_size); - len = sizeof(*region_info); - region_info = kzalloc(len, GFP_KERNEL); - if (!region_info) - return -ENOMEM; + ri->dma_base = cpu_to_le64(region_start); + ri->page_count = cpu_to_le32(page_count); + ri->page_size_log2 = ilog2(region_page_size); - page_count = DIV_ROUND_UP(region_size, region_page_size); + dev_dbg(&pdev->dev, + "region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n", + i, region_start, node->last, region_size, page_count, + region_page_size); - region_info->dma_base = cpu_to_le64(region_start); - region_info->page_count = cpu_to_le32(page_count); - region_info->page_size_log2 = ilog2(region_page_size); + node = interval_tree_iter_next(node, 0, ULONG_MAX); + } regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, DMA_BIDIRECTIONAL); @@ -239,39 +320,20 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, goto out_free_region_info; } - err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions); + err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_ranges); dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); if (err) goto out_free_region_info; - /* - * page_count might be adjusted by the device, - * update it before freeing region_info DMA - */ - page_count = le32_to_cpu(region_info->page_count); - - dev_dbg(&pdev->dev, - "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n", - regions_dma, region_start, page_count, - (u8)ilog2(region_page_size)); - - err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE); - if (err) { - dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", - ERR_PTR(err)); - goto out_free_region_info; - } - - err = pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->region, page_count); + err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info, + region_page_size, num_ranges); if (err) { - dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", - ERR_PTR(err)); - goto out_free_bitmaps; + dev_err(&pdev->dev, + "Failed to allocate %d regions for tracking dirty regions: %pe\n", + num_regions, ERR_PTR(err)); + goto out_dirty_disable; } - dirty->region.start = region_start; - dirty->region.size = region_size; - dirty->region.page_size = region_page_size; pds_vfio_dirty_set_enabled(pds_vfio); pds_vfio_print_guest_region_info(pds_vfio, max_regions); @@ -280,8 +342,8 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, return 0; -out_free_bitmaps: - pds_vfio_dirty_free_bitmaps(dirty); +out_dirty_disable: + pds_vfio_dirty_disable_cmd(pds_vfio); out_free_region_info: kfree(region_info); return err; @@ -295,6 +357,7 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) pds_vfio_dirty_disable_cmd(pds_vfio); pds_vfio_dirty_free_sgl(pds_vfio); pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); + pds_vfio_dirty_free_regions(&pds_vfio->dirty); } if (send_cmd) @@ -365,6 +428,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, num_sge = sg_table.nents; size = num_sge * sizeof(struct pds_lm_sg_elem); + offset += region->dev_bmp_offset_start_byte; dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir); err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge, offset, bmp_bytes, read_seq); @@ -437,13 +501,28 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, return 0; } +static struct pds_vfio_region * +pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova) +{ + struct pds_vfio_dirty *dirty = &pds_vfio->dirty; + + for (int i = 0; i < dirty->num_regions; i++) { + struct pds_vfio_region *region = &dirty->regions[i]; + + if (iova >= region->start && + iova < (region->start + region->size)) + return region; + } + + return NULL; +} + static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, struct iova_bitmap *dirty_bitmap, unsigned long iova, unsigned long length) { struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; - struct pds_vfio_dirty *dirty = &pds_vfio->dirty; - struct pds_vfio_region *region = &dirty->region; + struct pds_vfio_region *region; u64 bmp_offset, bmp_bytes; u64 bitmap_size, pages; int err; @@ -456,6 +535,13 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, return -EINVAL; } + region = pds_vfio_get_region(pds_vfio, iova); + if (!region) { + dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n", + pds_vfio->vf_id, iova, length); + return -EINVAL; + } + pages = DIV_ROUND_UP(length, region->page_size); bitmap_size = round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; diff --git a/drivers/vfio/pci/pds/dirty.h b/drivers/vfio/pci/pds/dirty.h index a1f6d894f913..c8e23018b801 100644 --- a/drivers/vfio/pci/pds/dirty.h +++ b/drivers/vfio/pci/pds/dirty.h @@ -13,11 +13,13 @@ struct pds_vfio_region { u64 page_size; struct pds_lm_sg_elem *sgl; dma_addr_t sgl_addr; + u32 dev_bmp_offset_start_byte; u16 num_sge; }; struct pds_vfio_dirty { - struct pds_vfio_region region; + struct pds_vfio_region *regions; + u8 num_regions; bool is_enabled; }; -- cgit v1.2.3 From 160912fc3d4ae492954c25cd78b90083869f0011 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Nov 2023 20:09:00 +0000 Subject: vfio/type1: account iommu allocations iommu allocations should be accounted in order to allow admins to monitor and limit the amount of iommu memory. Signed-off-by: Pasha Tatashin Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231130200900.2320829-1-pasha.tatashin@soleen.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index eacd6ec04de5..b2854d7939ce 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1436,7 +1436,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, npage << PAGE_SHIFT, prot | IOMMU_CACHE, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (ret) goto unwind; @@ -1750,7 +1750,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, size, - dma->prot | IOMMU_CACHE, GFP_KERNEL); + dma->prot | IOMMU_CACHE, + GFP_KERNEL_ACCOUNT); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1845,7 +1846,8 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head * continue; ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, + GFP_KERNEL_ACCOUNT); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); -- cgit v1.2.3 From 8bccc5b80678c69f7729ce4cd232c0aa98fa6277 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Dec 2023 11:32:45 +0200 Subject: vfio/pci: Expose vfio_pci_core_setup_barmap() Expose vfio_pci_core_setup_barmap() to be used by drivers. This will let drivers to mmap a BAR and re-use it from both vfio and the driver when it's applicable. This API will be used in the next patches by the vfio/virtio coming driver. Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20231219093247.170936-8-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_rdwr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e27de61ac9fe..a9887fd6de46 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -200,7 +200,7 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, return done; } -static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar) +int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -223,6 +223,7 @@ static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar) return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -262,7 +263,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, } x_end = end; } else { - int ret = vfio_pci_setup_barmap(vdev, bar); + int ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) { done = ret; goto out; @@ -438,7 +439,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, return -EINVAL; #endif - ret = vfio_pci_setup_barmap(vdev, bar); + ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) return ret; -- cgit v1.2.3 From 8486ae162b3b6cc1055366f044495cf1966231f1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Dec 2023 11:32:46 +0200 Subject: vfio/pci: Expose vfio_pci_core_iowrite/read##size() Expose vfio_pci_core_iowrite/read##size() to let it be used by drivers. This functionality is needed to enable direct access to some physical BAR of the device with the proper locks/checks in place. The next patches from this series will use this functionality on a data path flow when a direct access to the BAR is needed. Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20231219093247.170936-9-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_rdwr.c | 50 +++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index a9887fd6de46..07fea08ea8a2 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -38,7 +38,7 @@ #define vfio_iowrite8 iowrite8 #define VFIO_IOWRITE(size) \ -static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \ +int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size val, void __iomem *io) \ { \ if (test_mem) { \ @@ -55,7 +55,8 @@ static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \ up_read(&vdev->memory_lock); \ \ return 0; \ -} +} \ +EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size); VFIO_IOWRITE(8) VFIO_IOWRITE(16) @@ -65,7 +66,7 @@ VFIO_IOWRITE(64) #endif #define VFIO_IOREAD(size) \ -static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \ +int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size *val, void __iomem *io) \ { \ if (test_mem) { \ @@ -82,7 +83,8 @@ static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \ up_read(&vdev->memory_lock); \ \ return 0; \ -} +} \ +EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size); VFIO_IOREAD(8) VFIO_IOREAD(16) @@ -119,13 +121,13 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, if (copy_from_user(&val, buf, 4)) return -EFAULT; - ret = vfio_pci_iowrite32(vdev, test_mem, - val, io + off); + ret = vfio_pci_core_iowrite32(vdev, test_mem, + val, io + off); if (ret) return ret; } else { - ret = vfio_pci_ioread32(vdev, test_mem, - &val, io + off); + ret = vfio_pci_core_ioread32(vdev, test_mem, + &val, io + off); if (ret) return ret; @@ -141,13 +143,13 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, if (copy_from_user(&val, buf, 2)) return -EFAULT; - ret = vfio_pci_iowrite16(vdev, test_mem, - val, io + off); + ret = vfio_pci_core_iowrite16(vdev, test_mem, + val, io + off); if (ret) return ret; } else { - ret = vfio_pci_ioread16(vdev, test_mem, - &val, io + off); + ret = vfio_pci_core_ioread16(vdev, test_mem, + &val, io + off); if (ret) return ret; @@ -163,13 +165,13 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, if (copy_from_user(&val, buf, 1)) return -EFAULT; - ret = vfio_pci_iowrite8(vdev, test_mem, - val, io + off); + ret = vfio_pci_core_iowrite8(vdev, test_mem, + val, io + off); if (ret) return ret; } else { - ret = vfio_pci_ioread8(vdev, test_mem, - &val, io + off); + ret = vfio_pci_core_ioread8(vdev, test_mem, + &val, io + off); if (ret) return ret; @@ -364,21 +366,21 @@ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, { switch (ioeventfd->count) { case 1: - vfio_pci_iowrite8(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; case 2: - vfio_pci_iowrite16(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; case 4: - vfio_pci_iowrite32(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; #ifdef iowrite64 case 8: - vfio_pci_iowrite64(ioeventfd->vdev, test_mem, - ioeventfd->data, ioeventfd->addr); + vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem, + ioeventfd->data, ioeventfd->addr); break; #endif } -- cgit v1.2.3 From eb61eca0e8c3efbdd6d3c9d2b7f70bf67e165f7d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 Dec 2023 11:32:47 +0200 Subject: vfio/virtio: Introduce a vfio driver over virtio devices Introduce a vfio driver over virtio devices to support the legacy interface functionality for VFs. Background, from the virtio spec [1]. -------------------------------------------------------------------- In some systems, there is a need to support a virtio legacy driver with a device that does not directly support the legacy interface. In such scenarios, a group owner device can provide the legacy interface functionality for the group member devices. The driver of the owner device can then access the legacy interface of a member device on behalf of the legacy member device driver. For example, with the SR-IOV group type, group members (VFs) can not present the legacy interface in an I/O BAR in BAR0 as expected by the legacy pci driver. If the legacy driver is running inside a virtual machine, the hypervisor executing the virtual machine can present a virtual device with an I/O BAR in BAR0. The hypervisor intercepts the legacy driver accesses to this I/O BAR and forwards them to the group owner device (PF) using group administration commands. -------------------------------------------------------------------- Specifically, this driver adds support for a virtio-net VF to be exposed as a transitional device to a guest driver and allows the legacy IO BAR functionality on top. This allows a VM which uses a legacy virtio-net driver in the guest to work transparently over a VF which its driver in the host is that new driver. The driver can be extended easily to support some other types of virtio devices (e.g virtio-blk), by adding in a few places the specific type properties as was done for virtio-net. For now, only the virtio-net use case was tested and as such we introduce the support only for such a device. Practically, Upon probing a VF for a virtio-net device, in case its PF supports legacy access over the virtio admin commands and the VF doesn't have BAR 0, we set some specific 'vfio_device_ops' to be able to simulate in SW a transitional device with I/O BAR in BAR 0. The existence of the simulated I/O bar is reported later on by overwriting the VFIO_DEVICE_GET_REGION_INFO command and the device exposes itself as a transitional device by overwriting some properties upon reading its config space. Once we report the existence of I/O BAR as BAR 0 a legacy driver in the guest may use it via read/write calls according to the virtio specification. Any read/write towards the control parts of the BAR will be captured by the new driver and will be translated into admin commands towards the device. In addition, any data path read/write access (i.e. virtio driver notifications) will be captured by the driver and forwarded to the physical BAR which its properties were supplied by the admin command VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO upon the probing/init flow. With that code in place a legacy driver in the guest has the look and feel as if having a transitional device with legacy support for both its control and data path flows. [1] https://github.com/oasis-tcs/virtio-spec/commit/03c2d32e5093ca9f2a17797242fbef88efe94b8c Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20231219093247.170936-10-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/Kconfig | 2 + drivers/vfio/pci/Makefile | 2 + drivers/vfio/pci/virtio/Kconfig | 15 + drivers/vfio/pci/virtio/Makefile | 3 + drivers/vfio/pci/virtio/main.c | 576 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 598 insertions(+) create mode 100644 drivers/vfio/pci/virtio/Kconfig create mode 100644 drivers/vfio/pci/virtio/Makefile create mode 100644 drivers/vfio/pci/virtio/main.c (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 8125e5f37832..18c397df566d 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -65,4 +65,6 @@ source "drivers/vfio/pci/hisilicon/Kconfig" source "drivers/vfio/pci/pds/Kconfig" +source "drivers/vfio/pci/virtio/Kconfig" + endmenu diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 45167be462d8..046139a4eca5 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -13,3 +13,5 @@ obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ obj-$(CONFIG_PDS_VFIO_PCI) += pds/ + +obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/ diff --git a/drivers/vfio/pci/virtio/Kconfig b/drivers/vfio/pci/virtio/Kconfig new file mode 100644 index 000000000000..fc3a0be9d8d4 --- /dev/null +++ b/drivers/vfio/pci/virtio/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +config VIRTIO_VFIO_PCI + tristate "VFIO support for VIRTIO NET PCI devices" + depends on VIRTIO_PCI_ADMIN_LEGACY + select VFIO_PCI_CORE + help + This provides support for exposing VIRTIO NET VF devices which support + legacy IO access, using the VFIO framework that can work with a legacy + virtio driver in the guest. + Based on PCIe spec, VFs do not support I/O Space. + As of that this driver emulates I/O BAR in software to let a VF be + seen as a transitional device by its users and let it work with + a legacy driver. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/virtio/Makefile b/drivers/vfio/pci/virtio/Makefile new file mode 100644 index 000000000000..7171105baf33 --- /dev/null +++ b/drivers/vfio/pci/virtio/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio-vfio-pci.o +virtio-vfio-pci-y := main.o diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c new file mode 100644 index 000000000000..291c55b641f1 --- /dev/null +++ b/drivers/vfio/pci/virtio/main.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct virtiovf_pci_core_device { + struct vfio_pci_core_device core_device; + u8 *bar0_virtual_buf; + /* synchronize access to the virtual buf */ + struct mutex bar_mutex; + void __iomem *notify_addr; + u64 notify_offset; + __le32 pci_base_addr_0; + __le16 pci_cmd; + u8 bar0_virtual_buf_size; + u8 notify_bar; +}; + +static int +virtiovf_issue_legacy_rw_cmd(struct virtiovf_pci_core_device *virtvdev, + loff_t pos, char __user *buf, + size_t count, bool read) +{ + bool msix_enabled = + (virtvdev->core_device.irq_type == VFIO_PCI_MSIX_IRQ_INDEX); + struct pci_dev *pdev = virtvdev->core_device.pdev; + u8 *bar0_buf = virtvdev->bar0_virtual_buf; + bool common; + u8 offset; + int ret; + + common = pos < VIRTIO_PCI_CONFIG_OFF(msix_enabled); + /* offset within the relevant configuration area */ + offset = common ? pos : pos - VIRTIO_PCI_CONFIG_OFF(msix_enabled); + mutex_lock(&virtvdev->bar_mutex); + if (read) { + if (common) + ret = virtio_pci_admin_legacy_common_io_read(pdev, offset, + count, bar0_buf + pos); + else + ret = virtio_pci_admin_legacy_device_io_read(pdev, offset, + count, bar0_buf + pos); + if (ret) + goto out; + if (copy_to_user(buf, bar0_buf + pos, count)) + ret = -EFAULT; + } else { + if (copy_from_user(bar0_buf + pos, buf, count)) { + ret = -EFAULT; + goto out; + } + + if (common) + ret = virtio_pci_admin_legacy_common_io_write(pdev, offset, + count, bar0_buf + pos); + else + ret = virtio_pci_admin_legacy_device_io_write(pdev, offset, + count, bar0_buf + pos); + } +out: + mutex_unlock(&virtvdev->bar_mutex); + return ret; +} + +static int +virtiovf_pci_bar0_rw(struct virtiovf_pci_core_device *virtvdev, + loff_t pos, char __user *buf, + size_t count, bool read) +{ + struct vfio_pci_core_device *core_device = &virtvdev->core_device; + struct pci_dev *pdev = core_device->pdev; + u16 queue_notify; + int ret; + + if (!(le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO)) + return -EIO; + + if (pos + count > virtvdev->bar0_virtual_buf_size) + return -EINVAL; + + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) { + pci_info_ratelimited(pdev, "runtime resume failed %d\n", ret); + return -EIO; + } + + switch (pos) { + case VIRTIO_PCI_QUEUE_NOTIFY: + if (count != sizeof(queue_notify)) { + ret = -EINVAL; + goto end; + } + if (read) { + ret = vfio_pci_core_ioread16(core_device, true, &queue_notify, + virtvdev->notify_addr); + if (ret) + goto end; + if (copy_to_user(buf, &queue_notify, + sizeof(queue_notify))) { + ret = -EFAULT; + goto end; + } + } else { + if (copy_from_user(&queue_notify, buf, count)) { + ret = -EFAULT; + goto end; + } + ret = vfio_pci_core_iowrite16(core_device, true, queue_notify, + virtvdev->notify_addr); + } + break; + default: + ret = virtiovf_issue_legacy_rw_cmd(virtvdev, pos, buf, count, + read); + } + +end: + pm_runtime_put(&pdev->dev); + return ret ? ret : count; +} + +static bool range_intersect_range(loff_t range1_start, size_t count1, + loff_t range2_start, size_t count2, + loff_t *start_offset, + size_t *intersect_count, + size_t *register_offset) +{ + if (range1_start <= range2_start && + range1_start + count1 > range2_start) { + *start_offset = range2_start - range1_start; + *intersect_count = min_t(size_t, count2, + range1_start + count1 - range2_start); + *register_offset = 0; + return true; + } + + if (range1_start > range2_start && + range1_start < range2_start + count2) { + *start_offset = 0; + *intersect_count = min_t(size_t, count1, + range2_start + count2 - range1_start); + *register_offset = range1_start - range2_start; + return true; + } + + return false; +} + +static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev, + char __user *buf, size_t count, + loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + size_t register_offset; + loff_t copy_offset; + size_t copy_count; + __le32 val32; + __le16 val16; + u8 val8; + int ret; + + ret = vfio_pci_core_read(core_vdev, buf, count, ppos); + if (ret < 0) + return ret; + + if (range_intersect_range(pos, count, PCI_DEVICE_ID, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + val16 = cpu_to_le16(VIRTIO_TRANS_ID_NET); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, copy_count)) + return -EFAULT; + } + + if ((le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO) && + range_intersect_range(pos, count, PCI_COMMAND, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + if (copy_from_user((void *)&val16 + register_offset, buf + copy_offset, + copy_count)) + return -EFAULT; + val16 |= cpu_to_le16(PCI_COMMAND_IO); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, + copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_REVISION_ID, sizeof(val8), + ©_offset, ©_count, ®ister_offset)) { + /* Transional needs to have revision 0 */ + val8 = 0; + if (copy_to_user(buf + copy_offset, &val8, copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, sizeof(val32), + ©_offset, ©_count, ®ister_offset)) { + u32 bar_mask = ~(virtvdev->bar0_virtual_buf_size - 1); + u32 pci_base_addr_0 = le32_to_cpu(virtvdev->pci_base_addr_0); + + val32 = cpu_to_le32((pci_base_addr_0 & bar_mask) | PCI_BASE_ADDRESS_SPACE_IO); + if (copy_to_user(buf + copy_offset, (void *)&val32 + register_offset, copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_SUBSYSTEM_ID, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + /* + * Transitional devices use the PCI subsystem device id as + * virtio device id, same as legacy driver always did. + */ + val16 = cpu_to_le16(VIRTIO_ID_NET); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, + copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID, sizeof(val16), + ©_offset, ©_count, ®ister_offset)) { + val16 = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET); + if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, + copy_count)) + return -EFAULT; + } + + return count; +} + +static ssize_t +virtiovf_pci_core_read(struct vfio_device *core_vdev, char __user *buf, + size_t count, loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (!count) + return 0; + + if (index == VFIO_PCI_CONFIG_REGION_INDEX) + return virtiovf_pci_read_config(core_vdev, buf, count, ppos); + + if (index == VFIO_PCI_BAR0_REGION_INDEX) + return virtiovf_pci_bar0_rw(virtvdev, pos, buf, count, true); + + return vfio_pci_core_read(core_vdev, buf, count, ppos); +} + +static ssize_t virtiovf_pci_write_config(struct vfio_device *core_vdev, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + size_t register_offset; + loff_t copy_offset; + size_t copy_count; + + if (range_intersect_range(pos, count, PCI_COMMAND, sizeof(virtvdev->pci_cmd), + ©_offset, ©_count, + ®ister_offset)) { + if (copy_from_user((void *)&virtvdev->pci_cmd + register_offset, + buf + copy_offset, + copy_count)) + return -EFAULT; + } + + if (range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, + sizeof(virtvdev->pci_base_addr_0), + ©_offset, ©_count, + ®ister_offset)) { + if (copy_from_user((void *)&virtvdev->pci_base_addr_0 + register_offset, + buf + copy_offset, + copy_count)) + return -EFAULT; + } + + return vfio_pci_core_write(core_vdev, buf, count, ppos); +} + +static ssize_t +virtiovf_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (!count) + return 0; + + if (index == VFIO_PCI_CONFIG_REGION_INDEX) + return virtiovf_pci_write_config(core_vdev, buf, count, ppos); + + if (index == VFIO_PCI_BAR0_REGION_INDEX) + return virtiovf_pci_bar0_rw(virtvdev, pos, (char __user *)buf, count, false); + + return vfio_pci_core_write(core_vdev, buf, count, ppos); +} + +static int +virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev, + unsigned int cmd, unsigned long arg) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + unsigned long minsz = offsetofend(struct vfio_region_info, offset); + void __user *uarg = (void __user *)arg; + struct vfio_region_info info = {}; + + if (copy_from_user(&info, uarg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + switch (info.index) { + case VFIO_PCI_BAR0_REGION_INDEX: + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = virtvdev->bar0_virtual_buf_size; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + return copy_to_user(uarg, &info, minsz) ? -EFAULT : 0; + default: + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + } +} + +static long +virtiovf_vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case VFIO_DEVICE_GET_REGION_INFO: + return virtiovf_pci_ioctl_get_region_info(core_vdev, cmd, arg); + default: + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + } +} + +static int +virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev) +{ + struct vfio_pci_core_device *core_device = &virtvdev->core_device; + int ret; + + /* + * Setup the BAR where the 'notify' exists to be used by vfio as well + * This will let us mmap it only once and use it when needed. + */ + ret = vfio_pci_core_setup_barmap(core_device, + virtvdev->notify_bar); + if (ret) + return ret; + + virtvdev->notify_addr = core_device->barmap[virtvdev->notify_bar] + + virtvdev->notify_offset; + return 0; +} + +static int virtiovf_pci_open_device(struct vfio_device *core_vdev) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + struct vfio_pci_core_device *vdev = &virtvdev->core_device; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + if (virtvdev->bar0_virtual_buf) { + /* + * Upon close_device() the vfio_pci_core_disable() is called + * and will close all the previous mmaps, so it seems that the + * valid life cycle for the 'notify' addr is per open/close. + */ + ret = virtiovf_set_notify_addr(virtvdev); + if (ret) { + vfio_pci_core_disable(vdev); + return ret; + } + } + + vfio_pci_core_finish_enable(vdev); + return 0; +} + +static int virtiovf_get_device_config_size(unsigned short device) +{ + /* Network card */ + return offsetofend(struct virtio_net_config, status); +} + +static int virtiovf_read_notify_info(struct virtiovf_pci_core_device *virtvdev) +{ + u64 offset; + int ret; + u8 bar; + + ret = virtio_pci_admin_legacy_io_notify_info(virtvdev->core_device.pdev, + VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM, + &bar, &offset); + if (ret) + return ret; + + virtvdev->notify_bar = bar; + virtvdev->notify_offset = offset; + return 0; +} + +static int virtiovf_pci_init_device(struct vfio_device *core_vdev) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + struct pci_dev *pdev; + int ret; + + ret = vfio_pci_core_init_dev(core_vdev); + if (ret) + return ret; + + pdev = virtvdev->core_device.pdev; + ret = virtiovf_read_notify_info(virtvdev); + if (ret) + return ret; + + virtvdev->bar0_virtual_buf_size = VIRTIO_PCI_CONFIG_OFF(true) + + virtiovf_get_device_config_size(pdev->device); + BUILD_BUG_ON(!is_power_of_2(virtvdev->bar0_virtual_buf_size)); + virtvdev->bar0_virtual_buf = kzalloc(virtvdev->bar0_virtual_buf_size, + GFP_KERNEL); + if (!virtvdev->bar0_virtual_buf) + return -ENOMEM; + mutex_init(&virtvdev->bar_mutex); + return 0; +} + +static void virtiovf_pci_core_release_dev(struct vfio_device *core_vdev) +{ + struct virtiovf_pci_core_device *virtvdev = container_of( + core_vdev, struct virtiovf_pci_core_device, core_device.vdev); + + kfree(virtvdev->bar0_virtual_buf); + vfio_pci_core_release_dev(core_vdev); +} + +static const struct vfio_device_ops virtiovf_vfio_pci_tran_ops = { + .name = "virtio-vfio-pci-trans", + .init = virtiovf_pci_init_device, + .release = virtiovf_pci_core_release_dev, + .open_device = virtiovf_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = virtiovf_vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = virtiovf_pci_core_read, + .write = virtiovf_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, +}; + +static const struct vfio_device_ops virtiovf_vfio_pci_ops = { + .name = "virtio-vfio-pci", + .init = vfio_pci_core_init_dev, + .release = vfio_pci_core_release_dev, + .open_device = virtiovf_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, +}; + +static bool virtiovf_bar0_exists(struct pci_dev *pdev) +{ + struct resource *res = pdev->resource; + + return res->flags; +} + +static int virtiovf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + const struct vfio_device_ops *ops = &virtiovf_vfio_pci_ops; + struct virtiovf_pci_core_device *virtvdev; + int ret; + + if (pdev->is_virtfn && virtio_pci_admin_has_legacy_io(pdev) && + !virtiovf_bar0_exists(pdev)) + ops = &virtiovf_vfio_pci_tran_ops; + + virtvdev = vfio_alloc_device(virtiovf_pci_core_device, core_device.vdev, + &pdev->dev, ops); + if (IS_ERR(virtvdev)) + return PTR_ERR(virtvdev); + + dev_set_drvdata(&pdev->dev, &virtvdev->core_device); + ret = vfio_pci_core_register_device(&virtvdev->core_device); + if (ret) + goto out; + return 0; +out: + vfio_put_device(&virtvdev->core_device.vdev); + return ret; +} + +static void virtiovf_pci_remove(struct pci_dev *pdev) +{ + struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); + + vfio_pci_core_unregister_device(&virtvdev->core_device); + vfio_put_device(&virtvdev->core_device.vdev); +} + +static const struct pci_device_id virtiovf_pci_table[] = { + /* Only virtio-net is supported/tested so far */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) }, + {} +}; + +MODULE_DEVICE_TABLE(pci, virtiovf_pci_table); + +void virtiovf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); + + virtvdev->pci_cmd = 0; +} + +static const struct pci_error_handlers virtiovf_err_handlers = { + .reset_done = virtiovf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + +static struct pci_driver virtiovf_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = virtiovf_pci_table, + .probe = virtiovf_pci_probe, + .remove = virtiovf_pci_remove, + .err_handler = &virtiovf_err_handlers, + .driver_managed_dma = true, +}; + +module_pci_driver(virtiovf_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yishai Hadas "); +MODULE_DESCRIPTION( + "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices"); -- cgit v1.2.3 From daca194876a94565194f6e32ddb0355af8cf30f7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 20 Dec 2023 10:24:56 +0200 Subject: vfio/virtio: Declare virtiovf_pci_aer_reset_done() static Declare virtiovf_pci_aer_reset_done() as a static function to prevent the below build warning. "warning: no previous prototype for 'virtiovf_pci_aer_reset_done' [-Wmissing-prototypes]" Fixes: eb61eca0e8c3 ("vfio/virtio: Introduce a vfio driver over virtio devices") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20231220143122.63337669@canb.auug.org.au/ Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20231220082456.241973-1-yishaih@nvidia.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312202115.oDmvN1VE-lkp@intel.com/ Signed-off-by: Alex Williamson --- drivers/vfio/pci/virtio/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c index 291c55b641f1..d5af683837d3 100644 --- a/drivers/vfio/pci/virtio/main.c +++ b/drivers/vfio/pci/virtio/main.c @@ -547,7 +547,7 @@ static const struct pci_device_id virtiovf_pci_table[] = { MODULE_DEVICE_TABLE(pci, virtiovf_pci_table); -void virtiovf_pci_aer_reset_done(struct pci_dev *pdev) +static void virtiovf_pci_aer_reset_done(struct pci_dev *pdev) { struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); -- cgit v1.2.3 From be12ad45e15b5ee0e2526a50266ba1d295d26a88 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Mon, 20 Nov 2023 09:14:06 +0000 Subject: hisi_acc_vfio_pci: Update migration data pointer correctly on saving/resume When the optional PRE_COPY support was added to speed up the device compatibility check, it failed to update the saving/resuming data pointers based on the fd offset. This results in migration data corruption and when the device gets started on the destination the following error is reported in some cases, [ 478.907684] arm-smmu-v3 arm-smmu-v3.2.auto: event 0x10 received: [ 478.913691] arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000310200000010 [ 478.919603] arm-smmu-v3 arm-smmu-v3.2.auto: 0x000002088000007f [ 478.925515] arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000000000000000 [ 478.931425] arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000000000000000 [ 478.947552] hisi_zip 0000:31:00.0: qm_axi_rresp [error status=0x1] found [ 478.955930] hisi_zip 0000:31:00.0: qm_db_timeout [error status=0x400] found [ 478.955944] hisi_zip 0000:31:00.0: qm sq doorbell timeout in function 2 Fixes: d9a871e4a143 ("hisi_acc_vfio_pci: Introduce support for PRE_COPY state transitions") Signed-off-by: Shameer Kolothum Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231120091406.780-1-shameerali.kolothum.thodi@huawei.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index b2f9778c8366..4d27465c8f1a 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -694,6 +694,7 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu size_t len, loff_t *pos) { struct hisi_acc_vf_migration_file *migf = filp->private_data; + u8 *vf_data = (u8 *)&migf->vf_data; loff_t requested_length; ssize_t done = 0; int ret; @@ -715,7 +716,7 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu goto out_unlock; } - ret = copy_from_user(&migf->vf_data, buf, len); + ret = copy_from_user(vf_data + *pos, buf, len); if (ret) { done = -EFAULT; goto out_unlock; @@ -835,7 +836,9 @@ static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len = min_t(size_t, migf->total_length - *pos, len); if (len) { - ret = copy_to_user(buf, &migf->vf_data, len); + u8 *vf_data = (u8 *)&migf->vf_data; + + ret = copy_to_user(buf, vf_data + *pos, len); if (ret) { done = -EFAULT; goto out_unlock; -- cgit v1.2.3 From 78f70c02bdbccb5e9b0b0c728185d4aeb7044ace Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Jan 2024 08:57:19 +0100 Subject: vfio/virtio: fix virtio-pci dependency The new vfio-virtio driver already has a dependency on VIRTIO_PCI_ADMIN_LEGACY, but that is a bool symbol and allows vfio-virtio to be built-in even if virtio-pci itself is a loadable module. This leads to a link failure: aarch64-linux-ld: drivers/vfio/pci/virtio/main.o: in function `virtiovf_pci_probe': main.c:(.text+0xec): undefined reference to `virtio_pci_admin_has_legacy_io' aarch64-linux-ld: drivers/vfio/pci/virtio/main.o: in function `virtiovf_pci_init_device': main.c:(.text+0x260): undefined reference to `virtio_pci_admin_legacy_io_notify_info' aarch64-linux-ld: drivers/vfio/pci/virtio/main.o: in function `virtiovf_pci_bar0_rw': main.c:(.text+0x6ec): undefined reference to `virtio_pci_admin_legacy_common_io_read' aarch64-linux-ld: main.c:(.text+0x6f4): undefined reference to `virtio_pci_admin_legacy_device_io_read' aarch64-linux-ld: main.c:(.text+0x7f0): undefined reference to `virtio_pci_admin_legacy_common_io_write' aarch64-linux-ld: main.c:(.text+0x7f8): undefined reference to `virtio_pci_admin_legacy_device_io_write' Add another explicit dependency on the tristate symbol. Fixes: eb61eca0e8c3 ("vfio/virtio: Introduce a vfio driver over virtio devices") Signed-off-by: Arnd Bergmann Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240109075731.2726731-1-arnd@kernel.org Signed-off-by: Alex Williamson --- drivers/vfio/pci/virtio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/virtio/Kconfig b/drivers/vfio/pci/virtio/Kconfig index fc3a0be9d8d4..bd80eca4a196 100644 --- a/drivers/vfio/pci/virtio/Kconfig +++ b/drivers/vfio/pci/virtio/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config VIRTIO_VFIO_PCI tristate "VFIO support for VIRTIO NET PCI devices" - depends on VIRTIO_PCI_ADMIN_LEGACY + depends on VIRTIO_PCI && VIRTIO_PCI_ADMIN_LEGACY select VFIO_PCI_CORE help This provides support for exposing VIRTIO NET VF devices which support -- cgit v1.2.3