summaryrefslogtreecommitdiffstats
path: root/scripts/gen-crc-consts.py
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2025-02-16 14:55:27 -0800
committerEric Biggers <ebiggers@google.com>2025-03-10 09:29:08 -0700
commitbbe2610bc5ada51418a4191e799cfb4577302a31 (patch)
tree1038e4c4be5ac959a6677c22737b368aa3769c75 /scripts/gen-crc-consts.py
parenta0bd462f3a1369f4440cc4448b879ed2c8611f1a (diff)
downloadlinux-stable-bbe2610bc5ada51418a4191e799cfb4577302a31.tar.gz
linux-stable-bbe2610bc5ada51418a4191e799cfb4577302a31.tar.bz2
linux-stable-bbe2610bc5ada51418a4191e799cfb4577302a31.zip
riscv/crc: add "template" for Zbc optimized CRC functions
Add a "template" crc-clmul-template.h that can generate RISC-V Zbc optimized CRC functions. Each generated CRC function is parameterized by CRC length and bit order, and it accepts a pointer to the constants struct required for the specific CRC polynomial desired. Update gen-crc-consts.py to support generating the needed constants structs. This makes it possible to easily wire up a Zbc optimized implementation of almost any CRC. The design generally follows what I did for x86, but it is simplified by using RISC-V's scalar carryless multiplication Zbc, which has no equivalent on x86. RISC-V's clmulr instruction is also helpful. A potential switch to Zvbc (or support for Zvbc alongside Zbc) is left for future work. For long messages Zvbc should be fastest, but it would need to be shown to be worthwhile over just using Zbc which is significantly more convenient to use, especially in the kernel context. Compared to the existing Zbc-optimized CRC32 code and the earlier proposed Zbc-optimized CRC-T10DIF code (https://lore.kernel.org/r/20250211071101.181652-1-zhihang.shao.iscas@gmail.com), this submission deduplicates the code among CRC variants and is significantly more optimized. It uses "folding" to take better advantage of instruction-level parallelism (to a more limited extent than x86 for now, but it could be extended to more), it reworks the Barrett reduction to eliminate unnecessary instructions, and it documents all the math used and makes all the constants reproducible. Tested-by: Björn Töpel <bjorn@rivosinc.com> Acked-by: Alexandre Ghiti <alexghiti@rivosinc.com> Link: https://lore.kernel.org/r/20250216225530.306980-2-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com>
Diffstat (limited to 'scripts/gen-crc-consts.py')
-rwxr-xr-xscripts/gen-crc-consts.py55
1 files changed, 54 insertions, 1 deletions
diff --git a/scripts/gen-crc-consts.py b/scripts/gen-crc-consts.py
index aa678a50897d..f9b44fc3a03f 100755
--- a/scripts/gen-crc-consts.py
+++ b/scripts/gen-crc-consts.py
@@ -105,6 +105,57 @@ def gen_slicebyN_tables(variants, n):
print(f'\t{s}')
print('};')
+def print_riscv_const(v, bits_per_long, name, val, desc):
+ print(f'\t.{name} = {fmt_poly(v, val, bits_per_long)}, /* {desc} */')
+
+def do_gen_riscv_clmul_consts(v, bits_per_long):
+ (G, n, lsb) = (v.G, v.bits, v.lsb)
+
+ pow_of_x = 3 * bits_per_long - (1 if lsb else 0)
+ print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_hi',
+ reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G')
+ pow_of_x = 2 * bits_per_long - (1 if lsb else 0)
+ print_riscv_const(v, bits_per_long, 'fold_across_2_longs_const_lo',
+ reduce(1 << pow_of_x, G), f'x^{pow_of_x} mod G')
+
+ pow_of_x = bits_per_long - 1 + n
+ print_riscv_const(v, bits_per_long, 'barrett_reduction_const_1',
+ div(1 << pow_of_x, G), f'floor(x^{pow_of_x} / G)')
+
+ val = G - (1 << n)
+ desc = f'G - x^{n}'
+ if lsb:
+ val <<= bits_per_long - n
+ desc = f'({desc}) * x^{bits_per_long - n}'
+ print_riscv_const(v, bits_per_long, 'barrett_reduction_const_2', val, desc)
+
+def gen_riscv_clmul_consts(variants):
+ print('')
+ print('struct crc_clmul_consts {');
+ print('\tunsigned long fold_across_2_longs_const_hi;');
+ print('\tunsigned long fold_across_2_longs_const_lo;');
+ print('\tunsigned long barrett_reduction_const_1;');
+ print('\tunsigned long barrett_reduction_const_2;');
+ print('};');
+ for v in variants:
+ print('');
+ if v.bits > 32:
+ print_header(v, 'Constants')
+ print('#ifdef CONFIG_64BIT')
+ print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{')
+ do_gen_riscv_clmul_consts(v, 64)
+ print('};')
+ print('#endif')
+ else:
+ print_header(v, 'Constants')
+ print(f'static const struct crc_clmul_consts {v.name}_consts __maybe_unused = {{')
+ print('#ifdef CONFIG_64BIT')
+ do_gen_riscv_clmul_consts(v, 64)
+ print('#else')
+ do_gen_riscv_clmul_consts(v, 32)
+ print('#endif')
+ print('};')
+
# Generate constants for carryless multiplication based CRC computation.
def gen_x86_pclmul_consts(variants):
# These are the distances, in bits, to generate folding constants for.
@@ -213,7 +264,7 @@ def parse_crc_variants(vars_string):
if len(sys.argv) != 3:
sys.stderr.write(f'Usage: {sys.argv[0]} CONSTS_TYPE[,CONSTS_TYPE]... CRC_VARIANT[,CRC_VARIANT]...\n')
- sys.stderr.write(' CONSTS_TYPE can be sliceby[1-8] or x86_pclmul\n')
+ sys.stderr.write(' CONSTS_TYPE can be sliceby[1-8], riscv_clmul, or x86_pclmul\n')
sys.stderr.write(' CRC_VARIANT is crc${num_bits}_${bit_order}_${generator_poly_as_hex}\n')
sys.stderr.write(' E.g. crc16_msb_0x8bb7 or crc32_lsb_0xedb88320\n')
sys.stderr.write(' Polynomial must use the given bit_order and exclude x^{num_bits}\n')
@@ -232,6 +283,8 @@ variants = parse_crc_variants(sys.argv[2])
for consts_type in consts_types:
if consts_type.startswith('sliceby'):
gen_slicebyN_tables(variants, int(consts_type.removeprefix('sliceby')))
+ elif consts_type == 'riscv_clmul':
+ gen_riscv_clmul_consts(variants)
elif consts_type == 'x86_pclmul':
gen_x86_pclmul_consts(variants)
else: