diff options
author | Eric Biggers <ebiggers@google.com> | 2025-02-16 14:55:27 -0800 |
---|---|---|
committer | Eric Biggers <ebiggers@google.com> | 2025-03-10 09:29:08 -0700 |
commit | bbe2610bc5ada51418a4191e799cfb4577302a31 (patch) | |
tree | 1038e4c4be5ac959a6677c22737b368aa3769c75 /scripts/gen-crc-consts.py | |
parent | a0bd462f3a1369f4440cc4448b879ed2c8611f1a (diff) | |
download | linux-stable-bbe2610bc5ada51418a4191e799cfb4577302a31.tar.gz linux-stable-bbe2610bc5ada51418a4191e799cfb4577302a31.tar.bz2 linux-stable-bbe2610bc5ada51418a4191e799cfb4577302a31.zip |
riscv/crc: add "template" for Zbc optimized CRC functions
Add a "template" crc-clmul-template.h that can generate RISC-V Zbc
optimized CRC functions. Each generated CRC function is parameterized
by CRC length and bit order, and it accepts a pointer to the constants
struct required for the specific CRC polynomial desired. Update
gen-crc-consts.py to support generating the needed constants structs.
This makes it possible to easily wire up a Zbc optimized implementation
of almost any CRC.
The design generally follows what I did for x86, but it is simplified by
using RISC-V's scalar carryless multiplication Zbc, which has no
equivalent on x86. RISC-V's clmulr instruction is also helpful. A
potential switch to Zvbc (or support for Zvbc alongside Zbc) is left for
future work. For long messages Zvbc should be fastest, but it would
need to be shown to be worthwhile over just using Zbc which is
significantly more convenient to use, especially in the kernel context.
Compared to the existing Zbc-optimized CRC32 code and the earlier
proposed Zbc-optimized CRC-T10DIF code
(https://lore.kernel.org/r/20250211071101.181652-1-zhihang.shao.iscas@gmail.com),
this submission deduplicates the code among CRC variants and is
significantly more optimized. It uses "folding" to take better
advantage of instruction-level parallelism (to a more limited extent
than x86 for now, but it could be extended to more), it reworks the
Barrett reduction to eliminate unnecessary instructions, and it
documents all the math used and makes all the constants reproducible.
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Acked-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20250216225530.306980-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Diffstat (limited to 'scripts/gen-crc-consts.py')
-rwxr-xr-x | scripts/gen-crc-consts.py | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/scripts/gen-crc-consts.py b/scripts/gen-crc-consts.py index aa678a50897d..f9b44fc3a03f 100755 --- a/scripts/gen-crc-consts.py +++ b/scripts/gen-crc-consts.py @@ -105,6 +105,57 @@ def gen_slicebyN_tables(variants, n): print(f'\t{s}') print('};') +def print_riscv_const(v, bits_per_long, name, val, desc): + print(f'\t.{name} = {fmt_poly(v, val, bits_per_long)}, /* {desc} */') + +def do_gen_riscv_clmul_consts(v, bits_per_long): + (G, n, lsb) = (v.G, v.bits, v.lsb) + + pow_of_x = 3 * bits_per_long - (1 if lsb else 0) + print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_hi', + reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G') + pow_of_x = 2 * bits_per_long - (1 if lsb else 0) + print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_lo', + reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G') + + pow_of_x = bits_per_long - 1 + n + print_riscv_const(v, bits_per_long, 'barrett_reduction_const_1', + div(1 << pow_of_x, G), f'floor(x^{pow_of_x} / G)') + + val = G - (1 << n) + desc = f'G - x^{n}' + if lsb: + val <<= bits_per_long - n + desc = f'({desc}) * x^{bits_per_long - n}' + print_riscv_const(v, bits_per_long, 'barrett_reduction_const_2', val, desc) + +def gen_riscv_clmul_consts(variants): + print('') + print('struct crc_clmul_consts {'); + print('\tunsigned long fold_across_2_longs_const_hi;'); + print('\tunsigned long fold_across_2_longs_const_lo;'); + print('\tunsigned long barrett_reduction_const_1;'); + print('\tunsigned long barrett_reduction_const_2;'); + print('};'); + for v in variants: + print(''); + if v.bits > 32: + print_header(v, 'Constants') + print('#ifdef CONFIG_64BIT') + print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{') + do_gen_riscv_clmul_consts(v, 64) + print('};') + print('#endif') + else: + print_header(v, 'Constants') + print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{') + print('#ifdef CONFIG_64BIT') + do_gen_riscv_clmul_consts(v, 64) + print('#else') + do_gen_riscv_clmul_consts(v, 32) + print('#endif') + print('};') + # Generate constants for carryless multiplication based CRC computation. def gen_x86_pclmul_consts(variants): # These are the distances, in bits, to generate folding constants for. @@ -213,7 +264,7 @@ def parse_crc_variants(vars_string): if len(sys.argv) != 3: sys.stderr.write(f'Usage: {sys.argv[0]} CONSTS_TYPE[,CONSTS_TYPE]... CRC_VARIANT[,CRC_VARIANT]...\n') - sys.stderr.write(' CONSTS_TYPE can be sliceby[1-8] or x86_pclmul\n') + sys.stderr.write(' CONSTS_TYPE can be sliceby[1-8], riscv_clmul, or x86_pclmul\n') sys.stderr.write(' CRC_VARIANT is crc${num_bits}_${bit_order}_${generator_poly_as_hex}\n') sys.stderr.write(' E.g. crc16_msb_0x8bb7 or crc32_lsb_0xedb88320\n') sys.stderr.write(' Polynomial must use the given bit_order and exclude x^{num_bits}\n') @@ -232,6 +283,8 @@ variants = parse_crc_variants(sys.argv[2]) for consts_type in consts_types: if consts_type.startswith('sliceby'): gen_slicebyN_tables(variants, int(consts_type.removeprefix('sliceby'))) + elif consts_type == 'riscv_clmul': + gen_riscv_clmul_consts(variants) elif consts_type == 'x86_pclmul': gen_x86_pclmul_consts(variants) else: |