summaryrefslogtreecommitdiffstats
path: root/kernel/dma
diff options
context:
space:
mode:
authorPetr Tesarik <petr.tesarik.ext@huawei.com>2023-08-03 13:59:41 +0200
committerChristoph Hellwig <hch@lst.de>2023-08-08 10:29:21 -0700
commitd069ed288ac74c24e2b1c294aa9445c80ed6c518 (patch)
treeab5552e5c52f44a0997c635cca9d30ea01cb88d4 /kernel/dma
parentf94cb36e760d2a4d359ad64f5fafc62ca755fd72 (diff)
downloadlinux-d069ed288ac74c24e2b1c294aa9445c80ed6c518.tar.gz
linux-d069ed288ac74c24e2b1c294aa9445c80ed6c518.tar.bz2
linux-d069ed288ac74c24e2b1c294aa9445c80ed6c518.zip
swiotlb: optimize get_max_slots()
Use a simple logical shift and increment to calculate the number of slots taken by the DMA segment boundary. At least GCC-13 is not able to optimize the expression, producing this horrible assembly code on x86: cmpq $-1, %rcx je .L364 addq $2048, %rcx shrq $11, %rcx movq %rcx, %r13 .L331: // rest of the function here... // after function epilogue and return: .L364: movabsq $9007199254740992, %r13 jmp .L331 After the optimization, the code looks more reasonable: shrq $11, %r11 leaq 1(%r11), %rbx Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'kernel/dma')
-rw-r--r--kernel/dma/swiotlb.c4
1 files changed, 1 insertions, 3 deletions
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 50a0e9c45c39..394494a6b1f3 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -903,9 +903,7 @@ static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
*/
static inline unsigned long get_max_slots(unsigned long boundary_mask)
{
- if (boundary_mask == ~0UL)
- return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
- return nr_slots(boundary_mask + 1);
+ return (boundary_mask >> IO_TLB_SHIFT) + 1;
}
static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index)