From 3ccefdea226ba3f3b69f9e868d2b1c9995b56615 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Fri, 3 Mar 2023 14:03:32 +0900 Subject: dma-buf: system_heap: avoid reclaim for order 4 Using order 4 pages would be helpful for IOMMUs mapping, but trying to get order 4 pages could spend quite much time in the page allocation. From the perspective of responsiveness, the deterministic memory allocation speed, I think, is quite important. The order 4 allocation with __GFP_RECLAIM may spend much time in reclaim and compation logic. __GFP_NORETRY also may affect. These cause unpredictable delay. To get reasonable allocation speed from dma-buf system heap, use HIGH_ORDER_GFP for order 4 to avoid reclaim. And let me remove meaningless __GFP_COMP for order 0. According to my tests, order 4 with MID_ORDER_GFP could get more number of order 4 pages but the elapsed times could be very slow. time order 8 order 4 order 0 584 usec 0 160 0 28,428 usec 0 160 0 100,701 usec 0 160 0 76,645 usec 0 160 0 25,522 usec 0 160 0 38,798 usec 0 160 0 89,012 usec 0 160 0 23,015 usec 0 160 0 73,360 usec 0 160 0 76,953 usec 0 160 0 31,492 usec 0 160 0 75,889 usec 0 160 0 84,551 usec 0 160 0 84,352 usec 0 160 0 57,103 usec 0 160 0 93,452 usec 0 160 0 If HIGH_ORDER_GFP is used for order 4, the number of order 4 could be decreased but the elapsed time results were quite stable and fast enough. time order 8 order 4 order 0 1,356 usec 0 155 80 1,901 usec 0 11 2384 1,912 usec 0 0 2560 1,911 usec 0 0 2560 1,884 usec 0 0 2560 1,577 usec 0 0 2560 1,366 usec 0 0 2560 1,711 usec 0 0 2560 1,635 usec 0 28 2112 544 usec 10 0 0 633 usec 2 128 0 848 usec 0 160 0 729 usec 0 160 0 1,000 usec 0 160 0 1,358 usec 0 160 0 2,638 usec 0 31 2064 Link: https://lkml.kernel.org/r/20230303050332.10138-1-jaewon31.kim@samsung.com Signed-off-by: Jaewon Kim Reviewed-by: John Stultz Cc: Daniel Vetter Cc: Johannes Weiner Cc: Michal Hocko Cc: Sumit Semwal Cc: T.J. Mercier Signed-off-by: Andrew Morton --- drivers/dma-buf/heaps/system_heap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index e8bd10e60998..920db302a273 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -41,12 +41,11 @@ struct dma_heap_attachment { bool mapped; }; -#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_COMP) -#define MID_ORDER_GFP (LOW_ORDER_GFP | __GFP_NOWARN) +#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) #define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ | __GFP_COMP) -static gfp_t order_flags[] = {HIGH_ORDER_GFP, MID_ORDER_GFP, LOW_ORDER_GFP}; +static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP}; /* * The selection of the orders used for allocation (1MB, 64K, 4K) is designed * to match with the sizes often found in IOMMUs. Using order 4 pages instead -- cgit v1.2.3 From cf2e309ebca7bb0916771839f9b580b06c778530 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 13 Mar 2023 19:28:19 +0800 Subject: mm: shrinkers: convert shrinker_rwsem to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now there are no readers of shrinker_rwsem, so we can simply replace it with mutex lock. Link: https://lkml.kernel.org/r/20230313112819.38938-9-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Acked-by: Vlastimil Babka Acked-by: Kirill Tkhai Acked-by: Roman Gushchin Cc: Christian König Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Paul E. McKenney Cc: Shakeel Butt Cc: Sultan Alsawaf Cc: Tetsuo Handa Cc: Yang Shi Signed-off-by: Andrew Morton --- drivers/md/dm-cache-metadata.c | 2 +- drivers/md/dm-thin-metadata.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index acffed750e3e..9e0c69958587 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs cmd root_lock). - * - must take shrinker_rwsem without holding cmd->root_lock + * - must take shrinker_mutex without holding cmd->root_lock */ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, CACHE_MAX_CONCURRENT_LOCKS); diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index fd464fb024c3..9f5cb52c5763 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1887,7 +1887,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs pmd root_lock). - * - must take shrinker_rwsem without holding pmd->root_lock + * - must take shrinker_mutex without holding pmd->root_lock */ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, THIN_MAX_CONCURRENT_LOCKS); -- cgit v1.2.3 From 5f29298fba1ad1e7139381e19e09c272db337b13 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:26 +0300 Subject: floppy: fix MAX_ORDER usage MAX_ORDER is not inclusive: the maximum allocation order buddy allocator can deliver is MAX_ORDER-1. Fix MAX_ORDER usage in floppy code. Also allocation buffer exactly PAGE_SIZE << MAX_ORDER bytes is okay. Fix MAX_LEN check. Link: https://lkml.kernel.org/r/20230315113133.11326-4-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Mike Rapoport (IBM) Cc: Denis Efremov Signed-off-by: Andrew Morton --- drivers/block/floppy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 487840e3564d..90d2dfb6448e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3079,7 +3079,7 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } -#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT) +#define MAX_LEN (1UL << (MAX_ORDER - 1) << PAGE_SHIFT) static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) @@ -3108,7 +3108,7 @@ loop: ptr->resultcode = 0; if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { - if (ptr->length <= 0 || ptr->length >= MAX_LEN) + if (ptr->length <= 0 || ptr->length > MAX_LEN) return -EINVAL; ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length); fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length); -- cgit v1.2.3 From 50c00d2c686e52c0dd9b5bddc6727f6c442cbcc0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:27 +0300 Subject: drm/i915: fix MAX_ORDER usage in i915_gem_object_get_pages_internal() MAX_ORDER is not inclusive: the maximum allocation order buddy allocator can deliver is MAX_ORDER-1. Fix MAX_ORDER usage in i915_gem_object_get_pages_internal(). Link: https://lkml.kernel.org/r/20230315113133.11326-5-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Tvrtko Ursulin Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Signed-off-by: Andrew Morton --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 6bc26b4b06b8..eae9e9f6d3bf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -36,7 +36,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct sg_table *st; struct scatterlist *sg; unsigned int npages; /* restricted by sg_alloc_table */ - int max_order = MAX_ORDER; + int max_order = MAX_ORDER - 1; unsigned int max_segment; gfp_t gfp; -- cgit v1.2.3 From fd54349ddb61445a8a42459b3dc09237c55e6f78 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:28 +0300 Subject: genwqe: fix MAX_ORDER usage MAX_ORDER is not inclusive: the maximum allocation order buddy allocator can deliver is MAX_ORDER-1. Fix MAX_ORDER usage in genwqe driver. Link: https://lkml.kernel.org/r/20230315113133.11326-6-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Frank Haverkamp Signed-off-by: Andrew Morton --- drivers/misc/genwqe/card_dev.c | 2 +- drivers/misc/genwqe/card_utils.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 55fc5b80e649..d0e27438a73c 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -443,7 +443,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) if (vsize == 0) return -EINVAL; - if (get_order(vsize) > MAX_ORDER) + if (get_order(vsize) >= MAX_ORDER) return -ENOMEM; dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index f778e11237a6..ac29698d085a 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -308,7 +308,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, sgl->write = write; sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); - if (get_order(sgl->sgl_size) > MAX_ORDER) { + if (get_order(sgl->sgl_size) >= MAX_ORDER) { dev_err(&pci_dev->dev, "[%s] err: too much memory requested!\n", __func__); return ret; -- cgit v1.2.3 From 61883d3c32418f16e35e030ca0cfd5d2de95a649 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:32 +0300 Subject: iommu: fix MAX_ORDER usage in __iommu_dma_alloc_pages() MAX_ORDER is not inclusive: the maximum allocation order buddy allocator can deliver is MAX_ORDER-1. Fix MAX_ORDER usage in __iommu_dma_alloc_pages(). Also use GENMASK() instead of hard to read "(2U << order) - 1" magic. Link: https://lkml.kernel.org/r/20230315113133.11326-10-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Robin Murphy Reviewed-by: Jacob Pan Acked-by: Joerg Roedel Signed-off-by: Andrew Morton --- drivers/iommu/dma-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 99b2646cb5c7..ac996fd6bd9c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -736,7 +736,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, struct page **pages; unsigned int i = 0, nid = dev_to_node(dev); - order_mask &= (2U << MAX_ORDER) - 1; + order_mask &= GENMASK(MAX_ORDER - 1, 0); if (!order_mask) return NULL; @@ -756,7 +756,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * than a necessity, hence using __GFP_NORETRY until * falling back to minimum-order allocations. */ - for (order_mask &= (2U << __fls(count)) - 1; + for (order_mask &= GENMASK(__fls(count), 0); order_mask; order_mask &= ~order_size) { unsigned int order = __fls(order_mask); gfp_t alloc_flags = gfp; -- cgit v1.2.3 From 23baf831a32c04f9a968812511540b1b3e648bf5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:33 +0300 Subject: mm, treewide: redefine MAX_ORDER sanely MAX_ORDER currently defined as number of orders page allocator supports: user can ask buddy allocator for page order between 0 and MAX_ORDER-1. This definition is counter-intuitive and lead to number of bugs all over the kernel. Change the definition of MAX_ORDER to be inclusive: the range of orders user can ask from buddy allocator is 0..MAX_ORDER now. [kirill@shutemov.name: fix min() warning] Link: https://lkml.kernel.org/r/20230315153800.32wib3n5rickolvh@box [akpm@linux-foundation.org: fix another min_t warning] [kirill@shutemov.name: fixups per Zi Yan] Link: https://lkml.kernel.org/r/20230316232144.b7ic4cif4kjiabws@box.shutemov.name [akpm@linux-foundation.org: fix underlining in docs] Link: https://lore.kernel.org/oe-kbuild-all/202303191025.VRCTk6mP-lkp@intel.com/ Link: https://lkml.kernel.org/r/20230315113133.11326-11-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Michael Ellerman [powerpc] Cc: "Kirill A. Shutemov" Cc: Zi Yan Signed-off-by: Andrew Morton --- drivers/base/regmap/regmap-debugfs.c | 8 ++++---- drivers/block/floppy.c | 2 +- drivers/crypto/ccp/sev-dev.c | 2 +- drivers/crypto/hisilicon/sgl.c | 6 +++--- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/ttm/ttm_pool.c | 22 +++++++++++----------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- drivers/iommu/dma-iommu.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- drivers/md/dm-bufio.c | 2 +- drivers/misc/genwqe/card_dev.c | 2 +- drivers/misc/genwqe/card_utils.c | 4 ++-- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/ibm/ibmvnic.h | 2 +- drivers/video/fbdev/hyperv_fb.c | 4 ++-- drivers/video/fbdev/vermilion/vermilion.c | 2 +- drivers/virtio/virtio_balloon.c | 2 +- drivers/virtio/virtio_mem.c | 12 ++++++------ 19 files changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 817eda2075aa..c491fabe3617 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -226,8 +226,8 @@ static ssize_t regmap_read_debugfs(struct regmap *map, unsigned int from, if (*ppos < 0 || !count) return -EINVAL; - if (count > (PAGE_SIZE << (MAX_ORDER - 1))) - count = PAGE_SIZE << (MAX_ORDER - 1); + if (count > (PAGE_SIZE << MAX_ORDER)) + count = PAGE_SIZE << MAX_ORDER; buf = kmalloc(count, GFP_KERNEL); if (!buf) @@ -373,8 +373,8 @@ static ssize_t regmap_reg_ranges_read_file(struct file *file, if (*ppos < 0 || !count) return -EINVAL; - if (count > (PAGE_SIZE << (MAX_ORDER - 1))) - count = PAGE_SIZE << (MAX_ORDER - 1); + if (count > (PAGE_SIZE << MAX_ORDER)) + count = PAGE_SIZE << MAX_ORDER; buf = kmalloc(count, GFP_KERNEL); if (!buf) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 90d2dfb6448e..cec2c20f5e59 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3079,7 +3079,7 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } -#define MAX_LEN (1UL << (MAX_ORDER - 1) << PAGE_SHIFT) +#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT) static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e2f25926eb51..bf095baca244 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -886,7 +886,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) /* * The length of the ID shouldn't be assumed by software since * it may change in the future. The allocation size is limited - * to 1 << (PAGE_SHIFT + MAX_ORDER - 1) by the page allocator. + * to 1 << (PAGE_SHIFT + MAX_ORDER) by the page allocator. * If the allocation fails, simply return ENOMEM rather than * warning in the kernel log. */ diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 09586a837b1e..3df7a256e919 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -70,11 +70,11 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, HISI_ACC_SGL_ALIGN_SIZE); /* - * the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1), + * the pool may allocate a block of memory of size PAGE_SIZE * 2^MAX_ORDER, * block size may exceed 2^31 on ia64, so the max of block size is 2^31 */ - block_size = 1 << (PAGE_SHIFT + MAX_ORDER <= 32 ? - PAGE_SHIFT + MAX_ORDER - 1 : 31); + block_size = 1 << (PAGE_SHIFT + MAX_ORDER < 32 ? + PAGE_SHIFT + MAX_ORDER : 31); sgl_num_per_block = block_size / sgl_size; block_num = count / sgl_num_per_block; remain_sgl = count % sgl_num_per_block; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index eae9e9f6d3bf..6bc26b4b06b8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -36,7 +36,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct sg_table *st; struct scatterlist *sg; unsigned int npages; /* restricted by sg_alloc_table */ - int max_order = MAX_ORDER - 1; + int max_order = MAX_ORDER; unsigned int max_segment; gfp_t gfp; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index defece0bcb81..99f39a5feca1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -115,7 +115,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) do { struct page *page; - GEM_BUG_ON(order >= MAX_ORDER); + GEM_BUG_ON(order > MAX_ORDER); page = alloc_pages(GFP | __GFP_ZERO, order); if (!page) goto err; diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index aa116a7bbae3..6c8585abe08d 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -65,11 +65,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_uncached[MAX_ORDER]; +static struct ttm_pool_type global_write_combined[MAX_ORDER + 1]; +static struct ttm_pool_type global_uncached[MAX_ORDER + 1]; -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1]; +static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -405,7 +405,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + for (order = min_t(unsigned int, MAX_ORDER, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -542,7 +542,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j <= MAX_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -562,7 +562,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j <= MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -616,7 +616,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i <= MAX_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -627,7 +627,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i <= MAX_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -736,7 +736,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i <= MAX_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -769,7 +769,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i <= MAX_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 8d772ea8a583..b574c58a3487 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -182,7 +182,7 @@ #ifdef CONFIG_CMA_ALIGNMENT #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT) #else -#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1) +#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER) #endif /* diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index ac996fd6bd9c..7a9f0b0bddbd 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -736,7 +736,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, struct page **pages; unsigned int i = 0, nid = dev_to_node(dev); - order_mask &= GENMASK(MAX_ORDER - 1, 0); + order_mask &= GENMASK(MAX_ORDER, 0); if (!order_mask) return NULL; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 586271b8aa39..85790b870877 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2440,8 +2440,8 @@ static bool its_parse_indirect_baser(struct its_node *its, * feature is not supported by hardware. */ new_order = max_t(u32, get_order(esz << ids), new_order); - if (new_order >= MAX_ORDER) { - new_order = MAX_ORDER - 1; + if (new_order > MAX_ORDER) { + new_order = MAX_ORDER; ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); pr_warn("ITS@%pa: %s Table too large, reduce ids %llu->%u\n", &its->phys_base, its_base_type_string[type], diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index cf077f9b30c3..733053c2eaa0 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -408,7 +408,7 @@ static void __cache_size_refresh(void) * If the allocation may fail we use __get_free_pages. Memory fragmentation * won't have a fatal effect here, but it just causes flushes of some other * buffers and more I/O will be performed. Don't use __get_free_pages if it - * always fails (i.e. order >= MAX_ORDER). + * always fails (i.e. order > MAX_ORDER). * * If the allocation shouldn't fail we use __vmalloc. This is only for the * initial reserve allocation, so there's no risk of wasting all vmalloc diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index d0e27438a73c..55fc5b80e649 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -443,7 +443,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) if (vsize == 0) return -EINVAL; - if (get_order(vsize) >= MAX_ORDER) + if (get_order(vsize) > MAX_ORDER) return -ENOMEM; dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index ac29698d085a..1c798d6b2dfb 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -210,7 +210,7 @@ u32 genwqe_crc32(u8 *buff, size_t len, u32 init) void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, dma_addr_t *dma_handle) { - if (get_order(size) >= MAX_ORDER) + if (get_order(size) > MAX_ORDER) return NULL; return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, @@ -308,7 +308,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, sgl->write = write; sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); - if (get_order(sgl->sgl_size) >= MAX_ORDER) { + if (get_order(sgl->sgl_size) > MAX_ORDER) { dev_err(&pci_dev->dev, "[%s] err: too much memory requested!\n", __func__); return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 25be7f8ac7cd..3973ca6adf4c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1041,7 +1041,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) return; order = get_order(alloc_size); - if (order >= MAX_ORDER) { + if (order > MAX_ORDER) { if (net_ratelimit()) dev_warn(ring_to_dev(ring), "failed to allocate tx spare buffer, exceed to max order\n"); return; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index b35c9b6f913b..4e18b4cefa97 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -75,7 +75,7 @@ * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. */ -#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << MAX_ORDER) * PAGE_SIZE)) #define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) #define IBMVNIC_LTB_SET_SIZE (38 << 20) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index ec3f6cf05f8c..34781dec3856 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -946,7 +946,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, if (request_size == 0) return -1; - if (order < MAX_ORDER) { + if (order <= MAX_ORDER) { /* Call alloc_pages if the size is less than 2^MAX_ORDER */ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!page) @@ -977,7 +977,7 @@ static void hvfb_release_phymem(struct hv_device *hdev, { unsigned int order = get_order(size); - if (order < MAX_ORDER) + if (order <= MAX_ORDER) __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); else dma_free_coherent(&hdev->device, diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c index 0374ee6b6d03..32e74e02a02f 100644 --- a/drivers/video/fbdev/vermilion/vermilion.c +++ b/drivers/video/fbdev/vermilion/vermilion.c @@ -197,7 +197,7 @@ static int vmlfb_alloc_vram(struct vml_info *vinfo, va = &vinfo->vram[i]; order = 0; - while (requested > (PAGE_SIZE << order) && order < MAX_ORDER) + while (requested > (PAGE_SIZE << order) && order <= MAX_ORDER) order++; err = vmlfb_alloc_vram_area(va, order, 0); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 3f78a3a1eb75..5b15936a5214 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -33,7 +33,7 @@ #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ __GFP_NOMEMALLOC) /* The order of free page blocks to report to host */ -#define VIRTIO_BALLOON_HINT_BLOCK_ORDER (MAX_ORDER - 1) +#define VIRTIO_BALLOON_HINT_BLOCK_ORDER MAX_ORDER /* The size of a free page block in bytes */ #define VIRTIO_BALLOON_HINT_BLOCK_BYTES \ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 0c2892ec6817..835f6cc2fb66 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1120,13 +1120,13 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, */ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) { - unsigned long order = MAX_ORDER - 1; + unsigned long order = MAX_ORDER; unsigned long i; /* * We might get called for ranges that don't cover properly aligned - * MAX_ORDER - 1 pages; however, we can only online properly aligned - * pages with an order of MAX_ORDER - 1 at maximum. + * MAX_ORDER pages; however, we can only online properly aligned + * pages with an order of MAX_ORDER at maximum. */ while (!IS_ALIGNED(pfn | nr_pages, 1 << order)) order--; @@ -1237,9 +1237,9 @@ static void virtio_mem_online_page(struct virtio_mem *vm, bool do_online; /* - * We can get called with any order up to MAX_ORDER - 1. If our - * subblock size is smaller than that and we have a mixture of plugged - * and unplugged subblocks within such a page, we have to process in + * We can get called with any order up to MAX_ORDER. If our subblock + * size is smaller than that and we have a mixture of plugged and + * unplugged subblocks within such a page, we have to process in * smaller granularity. In that case we'll adjust the order exactly once * within the loop. */ -- cgit v1.2.3 From 4a06f6f3d395d15eb285606f28b74ce5dbc77e52 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 12 Mar 2023 23:40:15 +0000 Subject: drm/ttm: remove comment referencing now-removed vmf_insert_mixed_prot() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function no longer exists, however the prot != vma->vm_page_prot case discussion has been retained and moved to vmf_insert_pfn_prot() so refer to this instead. Link: https://lkml.kernel.org/r/db403b3622b94a87bd93528cc1d6b44ae88adcdd.1678661628.git.lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Christian König Cc: Dan Williams Cc: Jason Gunthorpe Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Thomas Hellström Cc: Aaron Tomlin Cc: Christoph Lameter Cc: Frederic Weisbecker Cc: Heiko Carstens Cc: Huacai Chen Cc: Marcelo Tosatti Cc: Peter Xu Cc: "Russell King (Oracle)" Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index ca7744b852f5..5df3edadb808 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -254,7 +254,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, * encryption bits. This is because the exact location of the * data may not be known at mmap() time and may also change * at arbitrary times while the data is mmap'ed. - * See vmf_insert_mixed_prot() for a discussion. + * See vmf_insert_pfn_prot() for a discussion. */ ret = vmf_insert_pfn_prot(vma, address, pfn, prot); -- cgit v1.2.3 From a70aae12502b130b0c30dda44dff09e575c1aaeb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:43 +0200 Subject: zram: always compile read_from_bdev_sync Patch series "zram I/O path cleanups and fixups", v3. This series cleans up the zram I/O path, and fixes the handling of synchronous I/O to the underlying device in the writeback_store function or for > 4K PAGE_SIZE systems. The fixes are at the end, as I could not fully reason about them being safe before untangling the callchain. This patch (of 17): read_from_bdev_sync is currently only compiled for non-4k PAGE_SIZE, which means it won't be built with the most common configurations. Replace the ifdef with a check for the PAGE_SIZE in an if instead. The check uses an extra symbol and IS_ENABLED to allow the compiler to eliminate the dead code, leading to the same generated code size: text data bss dec hex filename 16709 1428 12 18149 46e5 drivers/block/zram/zram_drv.o.old 16709 1428 12 18149 46e5 drivers/block/zram/zram_drv.o.new Link: https://lkml.kernel.org/r/20230411171459.567614-1-hch@lst.de Link: https://lkml.kernel.org/r/20230411171459.567614-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Cc: Jens Axboe Cc: Minchan Kim Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index aa490da3cef2..57787cbdf1f7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -148,6 +148,7 @@ static inline bool is_partial_io(struct bio_vec *bvec) { return bvec->bv_len != PAGE_SIZE; } +#define ZRAM_PARTIAL_IO 1 #else static inline bool is_partial_io(struct bio_vec *bvec) { @@ -833,7 +834,6 @@ struct zram_work { struct bio_vec bvec; }; -#if PAGE_SIZE != 4096 static void zram_sync_read(struct work_struct *work) { struct zram_work *zw = container_of(work, struct zram_work, work); @@ -866,23 +866,17 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, return 1; } -#else -static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *bio) -{ - WARN_ON(1); - return -EIO; -} -#endif static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *parent, bool sync) { atomic64_inc(&zram->stats.bd_reads); - if (sync) + if (sync) { + if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) + return -EIO; return read_from_bdev_sync(zram, bvec, entry, parent); - else - return read_from_bdev_async(zram, bvec, entry, parent); + } + return read_from_bdev_async(zram, bvec, entry, parent); } #else static inline void reset_bdev(struct zram *zram) {}; -- cgit v1.2.3 From 9fe95babc7420722d39a1ded379027a1e1825d3a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:44 +0200 Subject: zram: remove valid_io_request All bios hande to drivers from the block layer are checked against the device size and for logical block alignment already (and have been since long before zram was merged), so don't duplicate those checks. Link: https://lkml.kernel.org/r/20230411171459.567614-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 34 +--------------------------------- drivers/block/zram/zram_drv.h | 1 - 2 files changed, 1 insertion(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 57787cbdf1f7..f4466ad1fd4a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -175,30 +175,6 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index) return prio & ZRAM_COMP_PRIORITY_MASK; } -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline bool valid_io_request(struct zram *zram, - sector_t start, unsigned int size) -{ - u64 end, bound; - - /* unaligned request */ - if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return false; - if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return false; - - end = start + (size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range */ - if (unlikely(start >= bound || end > bound || start > end)) - return false; - - /* I/O request is valid */ - return true; -} - static void update_position(u32 *index, int *offset, struct bio_vec *bvec) { *index += (*offset + bvec->bv_len) / PAGE_SIZE; @@ -1184,10 +1160,9 @@ static ssize_t io_stat_show(struct device *dev, down_read(&zram->init_lock); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8llu\n", + "%8llu %8llu 0 %8llu\n", (u64)atomic64_read(&zram->stats.failed_reads), (u64)atomic64_read(&zram->stats.failed_writes), - (u64)atomic64_read(&zram->stats.invalid_io), (u64)atomic64_read(&zram->stats.notify_free)); up_read(&zram->init_lock); @@ -2037,13 +2012,6 @@ static void zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_bdev->bd_disk->private_data; - if (!valid_io_request(zram, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size)) { - atomic64_inc(&zram->stats.invalid_io); - bio_io_error(bio); - return; - } - __zram_make_request(zram, bio); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c5254626f051..ca7a15bd4845 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -78,7 +78,6 @@ struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t failed_reads; /* can happen when memory is too low */ atomic64_t failed_writes; /* can happen when memory is too low */ - atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t same_pages; /* no. of same element filled pages */ atomic64_t huge_pages; /* no. of huge pages */ -- cgit v1.2.3 From 0120dd6e4e202e19a0e011e486fb2da40a5ea279 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:45 +0200 Subject: zram: make zram_bio_discard more self-contained Derive the index and offset variables inside the function, and complete the bio directly in preparation for cleaning up the I/O path. Link: https://lkml.kernel.org/r/20230411171459.567614-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f4466ad1fd4a..e9b31c199027 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1890,15 +1890,12 @@ release_init_lock: } #endif -/* - * zram_bio_discard - handler on discard request - * @index: physical block index in PAGE_SIZE units - * @offset: byte offset within physical block - */ -static void zram_bio_discard(struct zram *zram, u32 index, - int offset, struct bio *bio) +static void zram_bio_discard(struct zram *zram, struct bio *bio) { size_t n = bio->bi_iter.bi_size; + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; /* * zram manages data in physical block size units. Because logical block @@ -1926,6 +1923,8 @@ static void zram_bio_discard(struct zram *zram, u32 index, index++; n -= PAGE_SIZE; } + + bio_endio(bio); } /* @@ -1974,8 +1973,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - zram_bio_discard(zram, index, offset, bio); - bio_endio(bio); + zram_bio_discard(zram, bio); return; default: break; -- cgit v1.2.3 From af8b04c63708fa730c0257084fab91fb2a9cecc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:46 +0200 Subject: zram: simplify bvec iteration in __zram_make_request bio_for_each_segment synthetize bvecs that never cross page boundaries, so don't duplicate that work in an inner loop. Link: https://lkml.kernel.org/r/20230411171459.567614-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 42 +++++++++++------------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e9b31c199027..e13c7d8e283b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -175,12 +175,6 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index) return prio & ZRAM_COMP_PRIORITY_MASK; } -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) -{ - *index += (*offset + bvec->bv_len) / PAGE_SIZE; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; -} - static inline void update_used_max(struct zram *zram, const unsigned long pages) { @@ -1960,16 +1954,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, static void __zram_make_request(struct zram *zram, struct bio *bio) { - int offset; - u32 index; - struct bio_vec bvec; struct bvec_iter iter; + struct bio_vec bv; unsigned long start_time; - index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; - offset = (bio->bi_iter.bi_sector & - (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: @@ -1980,24 +1968,16 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) } start_time = bio_start_io_acct(bio); - bio_for_each_segment(bvec, bio, iter) { - struct bio_vec bv = bvec; - unsigned int unwritten = bvec.bv_len; - - do { - bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, - unwritten); - if (zram_bvec_rw(zram, &bv, index, offset, - bio_op(bio), bio) < 0) { - bio->bi_status = BLK_STS_IOERR; - break; - } - - bv.bv_offset += bv.bv_len; - unwritten -= bv.bv_len; - - update_position(&index, &offset, &bv); - } while (unwritten); + bio_for_each_segment(bv, bio, iter) { + u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; + + if (zram_bvec_rw(zram, &bv, index, offset, bio_op(bio), + bio) < 0) { + bio->bi_status = BLK_STS_IOERR; + break; + } } bio_end_io_acct(bio, start_time); bio_endio(bio); -- cgit v1.2.3 From d6eea0097e26769fb58c2773214c3bda85d1678a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:47 +0200 Subject: zram: move discard handling to zram_submit_bio Switch on the bio operation in zram_submit_bio and only call into __zram_make_request for read and write operations. Link: https://lkml.kernel.org/r/20230411171459.567614-6-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e13c7d8e283b..00f13eb1c800 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1958,15 +1958,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) struct bio_vec bv; unsigned long start_time; - switch (bio_op(bio)) { - case REQ_OP_DISCARD: - case REQ_OP_WRITE_ZEROES: - zram_bio_discard(zram, bio); - return; - default: - break; - } - start_time = bio_start_io_acct(bio); bio_for_each_segment(bv, bio, iter) { u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; @@ -1990,7 +1981,19 @@ static void zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_bdev->bd_disk->private_data; - __zram_make_request(zram, bio); + switch (bio_op(bio)) { + case REQ_OP_READ: + case REQ_OP_WRITE: + __zram_make_request(zram, bio); + break; + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + zram_bio_discard(zram, bio); + break; + default: + WARN_ON_ONCE(1); + bio_endio(bio); + } } static void zram_slot_free_notify(struct block_device *bdev, -- cgit v1.2.3 From 57de7bd830dae90301329748d60e196fab4c4125 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:48 +0200 Subject: zram: return early on error in zram_bvec_rw When the low-level access fails, don't clear the idle flag or clear the caches, and just return. Link: https://lkml.kernel.org/r/20230411171459.567614-7-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 00f13eb1c800..46dc7a274867 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1933,23 +1933,23 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (!op_is_write(op)) { ret = zram_bvec_read(zram, bvec, index, offset, bio); + if (unlikely(ret < 0)) { + atomic64_inc(&zram->stats.failed_reads); + return ret; + } flush_dcache_page(bvec->bv_page); } else { ret = zram_bvec_write(zram, bvec, index, offset, bio); + if (unlikely(ret < 0)) { + atomic64_inc(&zram->stats.failed_writes); + return ret; + } } zram_slot_lock(zram, index); zram_accessed(zram, index); zram_slot_unlock(zram, index); - - if (unlikely(ret < 0)) { - if (!op_is_write(op)) - atomic64_inc(&zram->stats.failed_reads); - else - atomic64_inc(&zram->stats.failed_writes); - } - - return ret; + return 0; } static void __zram_make_request(struct zram *zram, struct bio *bio) -- cgit v1.2.3 From 82ca875d2549f6843d0d16e897c9e1e0a13c8a74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:49 +0200 Subject: zram: refactor highlevel read and write handling Instead of having an outer loop in __zram_make_request and then branch out for reads vs writes for each loop iteration in zram_bvec_rw, split the main handler into separat zram_bio_read and zram_bio_write handlers that also include the functionality formerly in zram_bvec_rw. Link: https://lkml.kernel.org/r/20230411171459.567614-8-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 58 ++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 46dc7a274867..2d0154489f03 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1921,38 +1921,34 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio) bio_endio(bio); } -/* - * Returns errno if it has some problem. Otherwise return 0 or 1. - * Returns 0 if IO request was done synchronously - * Returns 1 if IO request was successfully submitted. - */ -static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, enum req_op op, struct bio *bio) +static void zram_bio_read(struct zram *zram, struct bio *bio) { - int ret; + struct bvec_iter iter; + struct bio_vec bv; + unsigned long start_time; - if (!op_is_write(op)) { - ret = zram_bvec_read(zram, bvec, index, offset, bio); - if (unlikely(ret < 0)) { + start_time = bio_start_io_acct(bio); + bio_for_each_segment(bv, bio, iter) { + u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; + + if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { atomic64_inc(&zram->stats.failed_reads); - return ret; - } - flush_dcache_page(bvec->bv_page); - } else { - ret = zram_bvec_write(zram, bvec, index, offset, bio); - if (unlikely(ret < 0)) { - atomic64_inc(&zram->stats.failed_writes); - return ret; + bio->bi_status = BLK_STS_IOERR; + break; } - } + flush_dcache_page(bv.bv_page); - zram_slot_lock(zram, index); - zram_accessed(zram, index); - zram_slot_unlock(zram, index); - return 0; + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); + } + bio_end_io_acct(bio, start_time); + bio_endio(bio); } -static void __zram_make_request(struct zram *zram, struct bio *bio) +static void zram_bio_write(struct zram *zram, struct bio *bio) { struct bvec_iter iter; struct bio_vec bv; @@ -1964,11 +1960,15 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - if (zram_bvec_rw(zram, &bv, index, offset, bio_op(bio), - bio) < 0) { + if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { + atomic64_inc(&zram->stats.failed_writes); bio->bi_status = BLK_STS_IOERR; break; } + + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); } bio_end_io_acct(bio, start_time); bio_endio(bio); @@ -1983,8 +1983,10 @@ static void zram_submit_bio(struct bio *bio) switch (bio_op(bio)) { case REQ_OP_READ: + zram_bio_read(zram, bio); + break; case REQ_OP_WRITE: - __zram_make_request(zram, bio); + zram_bio_write(zram, bio); break; case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: -- cgit v1.2.3 From f575a5add8a9a3ca593e58e218f2113e5bd3e50e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:50 +0200 Subject: zram: don't use highmem for the bounce buffer in zram_bvec_{read,write} There is no point in allocation a highmem page when we instantly need to copy from it. Link: https://lkml.kernel.org/r/20230411171459.567614-9-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2d0154489f03..0182316b2a90 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1431,7 +1431,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, page = bvec->bv_page; if (is_partial_io(bvec)) { /* Use a temporary buffer to decompress the page */ - page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); + page = alloc_page(GFP_NOIO); if (!page) return -ENOMEM; } @@ -1440,12 +1440,8 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, if (unlikely(ret)) goto out; - if (is_partial_io(bvec)) { - void *src = kmap_atomic(page); - - memcpy_to_bvec(bvec, src + offset); - kunmap_atomic(src); - } + if (is_partial_io(bvec)) + memcpy_to_bvec(bvec, page_address(page) + offset); out: if (is_partial_io(bvec)) __free_page(page); @@ -1589,12 +1585,11 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, vec = *bvec; if (is_partial_io(bvec)) { - void *dst; /* * This is a partial IO. We need to read the full page * before to write the changes. */ - page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); + page = alloc_page(GFP_NOIO); if (!page) return -ENOMEM; @@ -1602,9 +1597,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (ret) goto out; - dst = kmap_atomic(page); - memcpy_from_bvec(dst + offset, bvec); - kunmap_atomic(dst); + memcpy_from_bvec(page_address(page) + offset, bvec); bvec_set_page(&vec, page, PAGE_SIZE, 0); } -- cgit v1.2.3 From ffb0a9e66562083cc0fb0a93d2de85cecd23a0e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:51 +0200 Subject: zram: rename __zram_bvec_read to zram_read_page __zram_bvec_read doesn't get passed a bvec, but always read a whole page. Rename it to make the usage more clear. Link: https://lkml.kernel.org/r/20230411171459.567614-10-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0182316b2a90..414343b46ade 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1397,8 +1397,8 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, return ret; } -static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io) +static int zram_read_page(struct zram *zram, struct page *page, u32 index, + struct bio *bio, bool partial_io) { int ret; @@ -1436,7 +1436,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, return -ENOMEM; } - ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); + ret = zram_read_page(zram, page, index, bio, is_partial_io(bvec)); if (unlikely(ret)) goto out; @@ -1593,7 +1593,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (!page) return -ENOMEM; - ret = __zram_bvec_read(zram, page, index, bio, true); + ret = zram_read_page(zram, page, index, bio, true); if (ret) goto out; -- cgit v1.2.3 From 79c744eeaa8eaa2d8fbb4f2c1edf292df7163c8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:52 +0200 Subject: zram: directly call zram_read_page in writeback_store writeback_store always reads a full page, so just call zram_read_page directly and bypass the boune buffer handling. Link: https://lkml.kernel.org/r/20230411171459.567614-11-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 414343b46ade..40ef0a7ce348 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -54,9 +54,8 @@ static size_t huge_class_size; static const struct block_device_operations zram_devops; static void zram_free_page(struct zram *zram, size_t index); -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio); - +static int zram_read_page(struct zram *zram, struct page *page, u32 index, + struct bio *bio, bool partial_io); static int zram_slot_trylock(struct zram *zram, u32 index) { @@ -672,10 +671,6 @@ static ssize_t writeback_store(struct device *dev, } for (; nr_pages != 0; index++, nr_pages--) { - struct bio_vec bvec; - - bvec_set_page(&bvec, page, PAGE_SIZE, 0); - spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) { spin_unlock(&zram->wb_limit_lock); @@ -719,7 +714,7 @@ static ssize_t writeback_store(struct device *dev, /* Need for hugepage writeback racing */ zram_set_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); - if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { + if (zram_read_page(zram, page, index, NULL, false)) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); @@ -730,9 +725,8 @@ static ssize_t writeback_store(struct device *dev, bio_init(&bio, zram->bdev, &bio_vec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); + bio_add_page(&bio, page, PAGE_SIZE, 0); - bio_add_page(&bio, bvec.bv_page, bvec.bv_len, - bvec.bv_offset); /* * XXX: A single page IO would be inefficient for write * but it would be not bad as starter. -- cgit v1.2.3 From 889ae9169b45f0fc3fc05f93c3c6fa8a851eab67 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:53 +0200 Subject: zram: refactor zram_bdev_read Split the partial read into a separate helper. Link: https://lkml.kernel.org/r/20230411171459.567614-12-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 40ef0a7ce348..2e985531d8cb 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1416,33 +1416,33 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index, return ret; } -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) +/* + * Use a temporary buffer to decompress the page, as the decompressor + * always expects a full page for the output. + */ +static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) { + struct page *page = alloc_page(GFP_NOIO); int ret; - struct page *page; - - page = bvec->bv_page; - if (is_partial_io(bvec)) { - /* Use a temporary buffer to decompress the page */ - page = alloc_page(GFP_NOIO); - if (!page) - return -ENOMEM; - } - - ret = zram_read_page(zram, page, index, bio, is_partial_io(bvec)); - if (unlikely(ret)) - goto out; - if (is_partial_io(bvec)) + if (!page) + return -ENOMEM; + ret = zram_read_page(zram, page, index, bio, true); + if (likely(!ret)) memcpy_to_bvec(bvec, page_address(page) + offset); -out: - if (is_partial_io(bvec)) - __free_page(page); - + __free_page(page); return ret; } +static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ + if (is_partial_io(bvec)) + return zram_bvec_read_partial(zram, bvec, index, offset, bio); + return zram_read_page(zram, bvec->bv_page, index, bio, false); +} + static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct bio *bio) { -- cgit v1.2.3 From 6aa4b839e7a481cc20b243168dc3333fc3d87eb0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:54 +0200 Subject: zram: don't pass a bvec to __zram_bvec_write __zram_bvec_write only extracts the page from __zram_bvec_write and always expects a full page of input. Pass the page directly instead of the bvec and rename the function to zram_write_page. Link: https://lkml.kernel.org/r/20230411171459.567614-13-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2e985531d8cb..cbcfb5cb2fed 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1443,8 +1443,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, return zram_read_page(zram, bvec->bv_page, index, bio, false); } -static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, struct bio *bio) +static int zram_write_page(struct zram *zram, struct page *page, u32 index) { int ret = 0; unsigned long alloced_pages; @@ -1452,7 +1451,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, unsigned int comp_len = 0; void *src, *dst, *mem; struct zcomp_strm *zstrm; - struct page *page = bvec->bv_page; unsigned long element = 0; enum zram_pageflags flags = 0; @@ -1573,11 +1571,9 @@ out: static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio) { + struct page *page = bvec->bv_page; int ret; - struct page *page = NULL; - struct bio_vec vec; - vec = *bvec; if (is_partial_io(bvec)) { /* * This is a partial IO. We need to read the full page @@ -1592,11 +1588,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, goto out; memcpy_from_bvec(page_address(page) + offset, bvec); - - bvec_set_page(&vec, page, PAGE_SIZE, 0); } - ret = __zram_bvec_write(zram, &vec, index, bio); + ret = zram_write_page(zram, page, index); out: if (is_partial_io(bvec)) __free_page(page); @@ -1711,7 +1705,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, /* * No direct reclaim (slow path) for handle allocation and no - * re-compression attempt (unlike in __zram_bvec_write()) since + * re-compression attempt (unlike in zram_write_bvec()) since * we already have stored that object in zsmalloc. If we cannot * alloc memory for recompressed object then we bail out and * simply keep the old (existing) object in zsmalloc. -- cgit v1.2.3 From a0b81ae7a4ff8a779e9f16152563d614cb91f13c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:55 +0200 Subject: zram: refactor zram_bdev_write Split the read/modify/write case into a separate helper. Link: https://lkml.kernel.org/r/20230411171459.567614-14-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cbcfb5cb2fed..1a7fe75285e6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1568,33 +1568,33 @@ out: return ret; } -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) +/* + * This is a partial IO. Read the full page before writing the changes. + */ +static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) { - struct page *page = bvec->bv_page; + struct page *page = alloc_page(GFP_NOIO); int ret; - if (is_partial_io(bvec)) { - /* - * This is a partial IO. We need to read the full page - * before to write the changes. - */ - page = alloc_page(GFP_NOIO); - if (!page) - return -ENOMEM; - - ret = zram_read_page(zram, page, index, bio, true); - if (ret) - goto out; + if (!page) + return -ENOMEM; + ret = zram_read_page(zram, page, index, bio, true); + if (!ret) { memcpy_from_bvec(page_address(page) + offset, bvec); + ret = zram_write_page(zram, page, index); } + __free_page(page); + return ret; +} - ret = zram_write_page(zram, page, index); -out: +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ if (is_partial_io(bvec)) - __free_page(page); - return ret; + return zram_bvec_write_partial(zram, bvec, index, offset, bio); + return zram_write_page(zram, bvec->bv_page, index); } #ifdef CONFIG_ZRAM_MULTI_COMP -- cgit v1.2.3 From fd45af53e220b2fe13a5e8db88c5e92bc3296754 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:56 +0200 Subject: zram: pass a page to read_from_bdev read_from_bdev always reads a whole page, so pass a page to it instead of the bvec and remove the now pointless zram_bvec_read_from_bdev wrapper. Link: https://lkml.kernel.org/r/20230411171459.567614-15-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1a7fe75285e6..9a35bb0f1a03 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -588,7 +588,7 @@ static void zram_page_end_io(struct bio *bio) /* * Returns 1 if the submission is successful. */ -static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, +static int read_from_bdev_async(struct zram *zram, struct page *page, unsigned long entry, struct bio *parent) { struct bio *bio; @@ -599,7 +599,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, return -ENOMEM; bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { + if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { bio_put(bio); return -EIO; } @@ -795,7 +795,7 @@ struct zram_work { struct zram *zram; unsigned long entry; struct bio *bio; - struct bio_vec bvec; + struct page *page; }; static void zram_sync_read(struct work_struct *work) @@ -805,7 +805,7 @@ static void zram_sync_read(struct work_struct *work) unsigned long entry = zw->entry; struct bio *bio = zw->bio; - read_from_bdev_async(zram, &zw->bvec, entry, bio); + read_from_bdev_async(zram, zw->page, entry, bio); } /* @@ -813,12 +813,12 @@ static void zram_sync_read(struct work_struct *work) * chained IO with parent IO in same context, it's a deadlock. To avoid that, * use a worker thread context. */ -static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, +static int read_from_bdev_sync(struct zram *zram, struct page *page, unsigned long entry, struct bio *bio) { struct zram_work work; - work.bvec = *bvec; + work.page = page; work.zram = zram; work.entry = entry; work.bio = bio; @@ -831,20 +831,20 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, return 1; } -static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, +static int read_from_bdev(struct zram *zram, struct page *page, unsigned long entry, struct bio *parent, bool sync) { atomic64_inc(&zram->stats.bd_reads); if (sync) { if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) return -EIO; - return read_from_bdev_sync(zram, bvec, entry, parent); + return read_from_bdev_sync(zram, page, entry, parent); } - return read_from_bdev_async(zram, bvec, entry, parent); + return read_from_bdev_async(zram, page, entry, parent); } #else static inline void reset_bdev(struct zram *zram) {}; -static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, +static int read_from_bdev(struct zram *zram, struct page *page, unsigned long entry, struct bio *parent, bool sync) { return -EIO; @@ -1328,20 +1328,6 @@ out: ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); } -/* - * Reads a page from the writeback devices. Corresponding ZRAM slot - * should be unlocked. - */ -static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, - u32 index, struct bio *bio, bool partial_io) -{ - struct bio_vec bvec; - - bvec_set_page(&bvec, page, PAGE_SIZE, 0); - return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, - partial_io); -} - /* * Reads (decompresses if needed) a page from zspool (zsmalloc). * Corresponding ZRAM slot should be locked. @@ -1402,11 +1388,14 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index, ret = zram_read_from_zspool(zram, page, index); zram_slot_unlock(zram, index); } else { - /* Slot should be unlocked before the function call */ + /* + * The slot should be unlocked before reading from the backing + * device. + */ zram_slot_unlock(zram, index); - ret = zram_bvec_read_from_bdev(zram, page, index, bio, - partial_io); + ret = read_from_bdev(zram, page, zram_get_element(zram, index), + bio, partial_io); } /* Should NEVER happen. Return bio error if it does. */ -- cgit v1.2.3 From 0cd97a0372f21a66d1591114d0a12391e8d977d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:57 +0200 Subject: zram: don't return errors from read_from_bdev_async bio_alloc will never return a NULL bio when it is allowed to sleep, and adding a single page to bio with a single vector also can't fail, so switch to the asserting __bio_add_page variant and drop the error returns. Link: https://lkml.kernel.org/r/20230411171459.567614-16-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9a35bb0f1a03..24b31957d9e8 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -585,24 +585,16 @@ static void zram_page_end_io(struct bio *bio) bio_put(bio); } -/* - * Returns 1 if the submission is successful. - */ -static int read_from_bdev_async(struct zram *zram, struct page *page, +static void read_from_bdev_async(struct zram *zram, struct page *page, unsigned long entry, struct bio *parent) { struct bio *bio; bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, GFP_NOIO); - if (!bio) - return -ENOMEM; bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { - bio_put(bio); - return -EIO; - } + __bio_add_page(bio, page, PAGE_SIZE, 0); if (!parent) bio->bi_end_io = zram_page_end_io; @@ -610,7 +602,6 @@ static int read_from_bdev_async(struct zram *zram, struct page *page, bio_chain(bio, parent); submit_bio(bio); - return 1; } #define PAGE_WB_SIG "page_index=" @@ -840,7 +831,8 @@ static int read_from_bdev(struct zram *zram, struct page *page, return -EIO; return read_from_bdev_sync(zram, page, entry, parent); } - return read_from_bdev_async(zram, page, entry, parent); + read_from_bdev_async(zram, page, entry, parent); + return 1; } #else static inline void reset_bdev(struct zram *zram) {}; -- cgit v1.2.3 From 4e3c87b9421df497d849ae61aab9762de7f66afb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:58 +0200 Subject: zram: fix synchronous reads Currently nothing waits for the synchronous reads before accessing the data. Switch them to an on-stack bio and submit_bio_wait to make sure the I/O has actually completed when the work item has been flushed. This also removes the call to page_endio that would unlock a page that has never been locked. Drop the partial_io/sync flag, as chaining only makes sense for the asynchronous reads of the entire page. Link: https://lkml.kernel.org/r/20230411171459.567614-17-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 60 ++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 24b31957d9e8..a2b98100cbda 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -55,7 +55,7 @@ static const struct block_device_operations zram_devops; static void zram_free_page(struct zram *zram, size_t index); static int zram_read_page(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io); + struct bio *parent); static int zram_slot_trylock(struct zram *zram, u32 index) { @@ -576,31 +576,15 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx) atomic64_dec(&zram->stats.bd_count); } -static void zram_page_end_io(struct bio *bio) -{ - struct page *page = bio_first_page_all(bio); - - page_endio(page, op_is_write(bio_op(bio)), - blk_status_to_errno(bio->bi_status)); - bio_put(bio); -} - static void read_from_bdev_async(struct zram *zram, struct page *page, unsigned long entry, struct bio *parent) { struct bio *bio; - bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, - GFP_NOIO); - + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); __bio_add_page(bio, page, PAGE_SIZE, 0); - - if (!parent) - bio->bi_end_io = zram_page_end_io; - else - bio_chain(bio, parent); - + bio_chain(bio, parent); submit_bio(bio); } @@ -705,7 +689,7 @@ static ssize_t writeback_store(struct device *dev, /* Need for hugepage writeback racing */ zram_set_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); - if (zram_read_page(zram, page, index, NULL, false)) { + if (zram_read_page(zram, page, index, NULL)) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); @@ -785,18 +769,19 @@ struct zram_work { struct work_struct work; struct zram *zram; unsigned long entry; - struct bio *bio; struct page *page; }; static void zram_sync_read(struct work_struct *work) { struct zram_work *zw = container_of(work, struct zram_work, work); - struct zram *zram = zw->zram; - unsigned long entry = zw->entry; - struct bio *bio = zw->bio; + struct bio_vec bv; + struct bio bio; - read_from_bdev_async(zram, zw->page, entry, bio); + bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); + __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); + submit_bio_wait(&bio); } /* @@ -805,14 +790,13 @@ static void zram_sync_read(struct work_struct *work) * use a worker thread context. */ static int read_from_bdev_sync(struct zram *zram, struct page *page, - unsigned long entry, struct bio *bio) + unsigned long entry) { struct zram_work work; work.page = page; work.zram = zram; work.entry = entry; - work.bio = bio; INIT_WORK_ONSTACK(&work.work, zram_sync_read); queue_work(system_unbound_wq, &work.work); @@ -823,13 +807,13 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page, } static int read_from_bdev(struct zram *zram, struct page *page, - unsigned long entry, struct bio *parent, bool sync) + unsigned long entry, struct bio *parent) { atomic64_inc(&zram->stats.bd_reads); - if (sync) { + if (!parent) { if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) return -EIO; - return read_from_bdev_sync(zram, page, entry, parent); + return read_from_bdev_sync(zram, page, entry); } read_from_bdev_async(zram, page, entry, parent); return 1; @@ -837,7 +821,7 @@ static int read_from_bdev(struct zram *zram, struct page *page, #else static inline void reset_bdev(struct zram *zram) {}; static int read_from_bdev(struct zram *zram, struct page *page, - unsigned long entry, struct bio *parent, bool sync) + unsigned long entry, struct bio *parent) { return -EIO; } @@ -1370,7 +1354,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, } static int zram_read_page(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io) + struct bio *parent) { int ret; @@ -1387,7 +1371,7 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index, zram_slot_unlock(zram, index); ret = read_from_bdev(zram, page, zram_get_element(zram, index), - bio, partial_io); + parent); } /* Should NEVER happen. Return bio error if it does. */ @@ -1402,14 +1386,14 @@ static int zram_read_page(struct zram *zram, struct page *page, u32 index, * always expects a full page for the output. */ static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) + u32 index, int offset) { struct page *page = alloc_page(GFP_NOIO); int ret; if (!page) return -ENOMEM; - ret = zram_read_page(zram, page, index, bio, true); + ret = zram_read_page(zram, page, index, NULL); if (likely(!ret)) memcpy_to_bvec(bvec, page_address(page) + offset); __free_page(page); @@ -1420,8 +1404,8 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio) { if (is_partial_io(bvec)) - return zram_bvec_read_partial(zram, bvec, index, offset, bio); - return zram_read_page(zram, bvec->bv_page, index, bio, false); + return zram_bvec_read_partial(zram, bvec, index, offset); + return zram_read_page(zram, bvec->bv_page, index, bio); } static int zram_write_page(struct zram *zram, struct page *page, u32 index) @@ -1561,7 +1545,7 @@ static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, if (!page) return -ENOMEM; - ret = zram_read_page(zram, page, index, bio, true); + ret = zram_read_page(zram, page, index, bio); if (!ret) { memcpy_from_bvec(page_address(page) + offset, bvec); ret = zram_write_page(zram, page, index); -- cgit v1.2.3 From 1e9460d132cc728941621f0f7a7b03a7d1c469af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2023 19:14:59 +0200 Subject: zram: return errors from read_from_bdev_sync Propagate read errors to the caller instead of dropping them on the floor, and stop returning the somewhat dangerous 1 on success from read_from_bdev*. Link: https://lkml.kernel.org/r/20230411171459.567614-18-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jens Axboe Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a2b98100cbda..38a5d3a2836b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -770,6 +770,7 @@ struct zram_work { struct zram *zram; unsigned long entry; struct page *page; + int error; }; static void zram_sync_read(struct work_struct *work) @@ -781,7 +782,7 @@ static void zram_sync_read(struct work_struct *work) bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); - submit_bio_wait(&bio); + zw->error = submit_bio_wait(&bio); } /* @@ -803,7 +804,7 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page, flush_work(&work.work); destroy_work_on_stack(&work.work); - return 1; + return work.error; } static int read_from_bdev(struct zram *zram, struct page *page, @@ -816,7 +817,7 @@ static int read_from_bdev(struct zram *zram, struct page *page, return read_from_bdev_sync(zram, page, entry); } read_from_bdev_async(zram, page, entry, parent); - return 1; + return 0; } #else static inline void reset_bdev(struct zram *zram) {}; -- cgit v1.2.3