summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorBarry Song <v-songbaohua@oppo.com>2024-03-02 08:27:45 +1300
committerHerbert Xu <herbert@gondor.apana.org.au>2024-03-08 19:23:25 +0800
commit77292bb8ca69c808741aadbd29207605296e24af (patch)
treeb9e1fc42286a991a1840486a7c0a2789728628a0 /crypto
parent43a7885ec0dfca2bdc60f2de736e55cf5e7b915d (diff)
downloadlinux-77292bb8ca69c808741aadbd29207605296e24af.tar.gz
linux-77292bb8ca69c808741aadbd29207605296e24af.tar.bz2
linux-77292bb8ca69c808741aadbd29207605296e24af.zip
crypto: scomp - remove memcpy if sg_nents is 1 and pages are lowmem
while sg_nents is 1, which is always true for the current kernel as the only user - zswap is this case, we might have a chance to remove memcpy, thus improve the performance. Though sg_nents is 1, its buffer might cross two pages. If those pages are highmem, we have no cheap way to map them to contiguous virtual address because kmap doesn't support more than one page (kmap single higmem page could be still expensive for tlb) and vmap is expensive. So we also test and enure page is not highmem in order to safely use page_to_virt before removing the memcpy. The good news is that in the most majority of cases, we are lowmem, and we are always lowmem in those modern and popular hardware. Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Signed-off-by: Barry Song <v-songbaohua@oppo.com> Tested-by: Chengming Zhou <zhouchengming@bytedance.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/scompress.c36
1 files changed, 29 insertions, 7 deletions
diff --git a/crypto/scompress.c b/crypto/scompress.c
index 9cda4ef84a9b..93daf3eb9842 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -117,6 +117,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
struct crypto_scomp *scomp = *tfm_ctx;
void **ctx = acomp_request_ctx(req);
struct scomp_scratch *scratch;
+ void *src, *dst;
unsigned int dlen;
int ret;
@@ -134,13 +135,25 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
scratch = raw_cpu_ptr(&scomp_scratch);
spin_lock(&scratch->lock);
- scatterwalk_map_and_copy(scratch->src, req->src, 0, req->slen, 0);
+ if (sg_nents(req->src) == 1 && !PageHighMem(sg_page(req->src))) {
+ src = page_to_virt(sg_page(req->src)) + req->src->offset;
+ } else {
+ scatterwalk_map_and_copy(scratch->src, req->src, 0,
+ req->slen, 0);
+ src = scratch->src;
+ }
+
+ if (req->dst && sg_nents(req->dst) == 1 && !PageHighMem(sg_page(req->dst)))
+ dst = page_to_virt(sg_page(req->dst)) + req->dst->offset;
+ else
+ dst = scratch->dst;
+
if (dir)
- ret = crypto_scomp_compress(scomp, scratch->src, req->slen,
- scratch->dst, &req->dlen, *ctx);
+ ret = crypto_scomp_compress(scomp, src, req->slen,
+ dst, &req->dlen, *ctx);
else
- ret = crypto_scomp_decompress(scomp, scratch->src, req->slen,
- scratch->dst, &req->dlen, *ctx);
+ ret = crypto_scomp_decompress(scomp, src, req->slen,
+ dst, &req->dlen, *ctx);
if (!ret) {
if (!req->dst) {
req->dst = sgl_alloc(req->dlen, GFP_ATOMIC, NULL);
@@ -152,8 +165,17 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
ret = -ENOSPC;
goto out;
}
- scatterwalk_map_and_copy(scratch->dst, req->dst, 0, req->dlen,
- 1);
+ if (dst == scratch->dst) {
+ scatterwalk_map_and_copy(scratch->dst, req->dst, 0,
+ req->dlen, 1);
+ } else {
+ int nr_pages = DIV_ROUND_UP(req->dst->offset + req->dlen, PAGE_SIZE);
+ int i;
+ struct page *dst_page = sg_page(req->dst);
+
+ for (i = 0; i < nr_pages; i++)
+ flush_dcache_page(dst_page + i);
+ }
}
out:
spin_unlock(&scratch->lock);