253 files changed, 10376 insertions, 7297 deletions
diff --git a/lib/.gitignore b/lib/.gitignore
index 54596b634ecb..101a4aa92fb5 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -5,5 +5,3 @@
 /gen_crc32table
 /gen_crc64table
 /oid_registry_data.c
-/test_fortify.log
-/test_fortify/*.log
diff --git a/lib/842/842.h b/lib/842/842.h
index 7b1f581a2907..f9e8a5dd790f 100644
--- a/lib/842/842.h
+++ b/lib/842/842.h
@@ -78,7 +78,7 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/crc32.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #include <linux/sw842.h>
 
diff --git a/lib/Kconfig b/lib/Kconfig
index d33a268bc256..dccb61b7d698 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -40,6 +40,18 @@ config PACKING
 
 	  When in doubt, say N.
 
+config PACKING_KUNIT_TEST
+	tristate "KUnit tests for packing library" if !KUNIT_ALL_TESTS
+	depends on PACKING && KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This builds KUnit tests for the packing library.
+
+	  For more information on KUnit and unit tests in general,
+	  please refer to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  When in doubt, say N.
+
 config BITREVERSE
 	tristate
 
@@ -144,13 +156,18 @@ config CRC16
 
 config CRC_T10DIF
 	tristate "CRC calculation for the T10 Data Integrity Field"
-	select CRYPTO
-	select CRYPTO_CRCT10DIF
 	help
 	  This option is only needed if a module that's not in the
 	  kernel tree needs to calculate CRC checks for use with the
 	  SCSI data integrity subsystem.
 
+config ARCH_HAS_CRC_T10DIF
+	bool
+
+config CRC_T10DIF_ARCH
+	tristate
+	default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS
+
 config CRC64_ROCKSOFT
 	tristate "CRC calculation for the Rocksoft model CRC64"
 	select CRC64
@@ -178,61 +195,12 @@ config CRC32
 	  the kernel tree does. Such modules that use library CRC32/CRC32c
 	  functions require M here.
 
-config CRC32_SELFTEST
-	tristate "CRC32 perform self test on init"
-	depends on CRC32
-	help
-	  This option enables the CRC32 library functions to perform a
-	  self test on initialization. The self test computes crc32_le
-	  and crc32_be over byte strings with random alignment and length
-	  and computes the total elapsed time and number of bytes processed.
-
-choice
-	prompt "CRC32 implementation"
-	depends on CRC32
-	default CRC32_SLICEBY8
-	help
-	  This option allows a kernel builder to override the default choice
-	  of CRC32 algorithm.  Choose the default ("slice by 8") unless you
-	  know that you need one of the others.
-
-config CRC32_SLICEBY8
-	bool "Slice by 8 bytes"
-	help
-	  Calculate checksum 8 bytes at a time with a clever slicing algorithm.
-	  This is the fastest algorithm, but comes with a 8KiB lookup table.
-	  Most modern processors have enough cache to hold this table without
-	  thrashing the cache.
-
-	  This is the default implementation choice.  Choose this one unless
-	  you have a good reason not to.
-
-config CRC32_SLICEBY4
-	bool "Slice by 4 bytes"
-	help
-	  Calculate checksum 4 bytes at a time with a clever slicing algorithm.
-	  This is a bit slower than slice by 8, but has a smaller 4KiB lookup
-	  table.
-
-	  Only choose this option if you know what you are doing.
-
-config CRC32_SARWATE
-	bool "Sarwate's Algorithm (one byte at a time)"
-	help
-	  Calculate checksum a byte at a time using Sarwate's algorithm.  This
-	  is not particularly fast, but has a small 256 byte lookup table.
-
-	  Only choose this option if you know what you are doing.
-
-config CRC32_BIT
-	bool "Classic Algorithm (one bit at a time)"
-	help
-	  Calculate checksum one bit at a time.  This is VERY slow, but has
-	  no lookup table.  This is provided as a debugging option.
-
-	  Only choose this option if you are debugging crc32.
+config ARCH_HAS_CRC32
+	bool
 
-endchoice
+config CRC32_ARCH
+	tristate
+	default CRC32 if ARCH_HAS_CRC32 && CRC_OPTIMIZATIONS
 
 config CRC64
 	tristate "CRC64 functions"
@@ -260,14 +228,10 @@ config CRC7
 
 config LIBCRC32C
 	tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
-	select CRYPTO
-	select CRYPTO_CRC32C
+	select CRC32
 	help
-	  This option is provided for the case where no in-kernel-tree
-	  modules require CRC32c functions, but a module built outside the
-	  kernel tree does. Such modules that use library CRC32c functions
-	  require M here.  See Castagnoli93.
-	  Module will be libcrc32c.
+	  This option just selects CRC32 and is provided for compatibility
+	  purposes until the users are updated to select CRC32 directly.
 
 config CRC8
 	tristate "CRC8 function"
@@ -276,6 +240,17 @@ config CRC8
 	  when they need to do cyclic redundancy check according CRC8
 	  algorithm. Module will be called crc8.
 
+config CRC_OPTIMIZATIONS
+	bool "Enable optimized CRC implementations" if EXPERT
+	default y
+	help
+	  Disabling this option reduces code size slightly by disabling the
+	  architecture-optimized implementations of any CRC variants that are
+	  enabled.  CRC checksumming performance may get much slower.
+
+	  Keep this enabled unless you're really trying to minimize the size of
+	  the kernel.
+
 config XXHASH
 	tristate
 
@@ -539,13 +514,7 @@ config CPUMASK_OFFSTACK
 	  stack overflow.
 
 config FORCE_NR_CPUS
-       bool "Set number of CPUs at compile time"
-       depends on SMP && EXPERT && !COMPILE_TEST
-       help
-         Say Yes if you have NR_CPUS set to an actual number of possible
-         CPUs in your system, not to a default value. This forces the core
-         code to rely on compile-time value and optimize kernel routines
-         better.
+	def_bool !SMP
 
 config CPU_RMAP
 	bool
@@ -629,6 +598,7 @@ config SIGNATURE
 
 config DIMLIB
 	tristate
+	depends on NET
 	help
 	  Dynamic Interrupt Moderation library.
 	  Implements an algorithm for dynamically changing CQ moderation values
@@ -782,3 +752,9 @@ config POLYNOMIAL
 
 config FIRMWARE_TABLE
 	bool
+
+config UNION_FIND
+	bool
+
+config MIN_HEAP
+	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bba448c819b..1af972a92d06 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -97,7 +97,7 @@ config BOOT_PRINTK_DELAY
 	  using "boot_delay=N".
 
 	  It is likely that you would also need to use "lpj=M" to preset
-	  the "loops per jiffie" value.
+	  the "loops per jiffy" value.
 	  See a previous boot log for the "lpj" value to use for your
 	  system, and then set "lpj=M" before setting "boot_delay=N".
 	  NOTE:  Using this option may adversely affect SMP systems.
@@ -379,13 +379,15 @@ config DEBUG_INFO_BTF
 	depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
 	depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
 	depends on BPF_SYSCALL
-	depends on !DEBUG_INFO_DWARF5 || PAHOLE_VERSION >= 121
+	depends on PAHOLE_VERSION >= 116
+	depends on DEBUG_INFO_DWARF4 || PAHOLE_VERSION >= 121
 	# pahole uses elfutils, which does not have support for Hexagon relocations
 	depends on !HEXAGON
 	help
 	  Generate deduplicated BTF type information from DWARF debug info.
-	  Turning this on expects presence of pahole tool, which will convert
-	  DWARF type info into equivalent deduplicated BTF type info.
+	  Turning this on requires pahole v1.16 or later (v1.21 or later to
+	  support DWARF 5), which will convert DWARF type info into equivalent
+	  deduplicated BTF type info.
 
 config PAHOLE_HAS_SPLIT_BTF
 	def_bool PAHOLE_VERSION >= 119
@@ -431,7 +433,7 @@ config GDB_SCRIPTS
 	  build directory. If you load vmlinux into gdb, the helper
 	  scripts will be automatically imported by gdb as well, and
 	  additional functions are available to analyze a Linux kernel
-	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
+	  instance. See Documentation/process/debugging/gdb-kernel-debugging.rst
 	  for further details.
 
 endif # DEBUG_INFO
@@ -571,6 +573,21 @@ config VMLINUX_MAP
 	  pieces of code get eliminated with
 	  CONFIG_LD_DEAD_CODE_DATA_ELIMINATION.
 
+config BUILTIN_MODULE_RANGES
+	bool "Generate address range information for builtin modules"
+	depends on !LTO
+	depends on VMLINUX_MAP
+	help
+	 When modules are built into the kernel, there will be no module name
+	 associated with its symbols in /proc/kallsyms.  Tracers may want to
+	 identify symbols by module name and symbol name regardless of whether
+	 the module is configured as loadable or not.
+
+	 This option generates modules.builtin.ranges in the build tree with
+	 offset ranges (per ELF section) for the module(s) they belong to.
+	 It also records an anchor symbol to determine the load address of the
+	 section.
+
 config DEBUG_FORCE_WEAK_PER_CPU
 	bool "Force weak per-cpu definitions"
 	depends on DEBUG_KERNEL
@@ -969,6 +986,38 @@ config DEBUG_STACKOVERFLOW
 
 	  If in doubt, say "N".
 
+config CODE_TAGGING
+	bool
+	select KALLSYMS
+
+config MEM_ALLOC_PROFILING
+	bool "Enable memory allocation profiling"
+	default n
+	depends on MMU
+	depends on PROC_FS
+	depends on !DEBUG_FORCE_WEAK_PER_CPU
+	select CODE_TAGGING
+	select PAGE_EXTENSION
+	select SLAB_OBJ_EXT
+	help
+	  Track allocation source code and record total allocation size
+	  initiated at that code location. The mechanism can be used to track
+	  memory leaks with a low performance and memory impact.
+
+config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+	bool "Enable memory allocation profiling by default"
+	default y
+	depends on MEM_ALLOC_PROFILING
+
+config MEM_ALLOC_PROFILING_DEBUG
+	bool "Memory allocation profiler debugging"
+	default n
+	depends on MEM_ALLOC_PROFILING
+	select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+	help
+	  Adds warnings with helpful error messages for memory allocation
+	  profiling.
+
 source "lib/Kconfig.kasan"
 source "lib/Kconfig.kfence"
 source "lib/Kconfig.kmsan"
@@ -1012,7 +1061,9 @@ config PANIC_TIMEOUT
 	  Set the timeout value (in seconds) until a reboot occurs when
 	  the kernel panics. If n = 0, then we wait forever. A timeout
 	  value n > 0 will wait n seconds before rebooting, while a timeout
-	  value n < 0 will reboot immediately.
+	  value n < 0 will reboot immediately. This setting can be overridden
+	  with the kernel command line option panic=, and from userspace via
+	  /proc/sys/kernel/panic.
 
 config LOCKUP_DETECTOR
 	bool
@@ -1278,19 +1329,6 @@ config SCHEDSTATS
 
 endmenu
 
-config DEBUG_TIMEKEEPING
-	bool "Enable extra timekeeping sanity checking"
-	help
-	  This option will enable additional timekeeping sanity checks
-	  which may be helpful when diagnosing issues where timekeeping
-	  problems are suspected.
-
-	  This may include checks in the timekeeping hotpaths, so this
-	  option may have a (very small) performance impact to some
-	  workloads.
-
-	  If unsure, say N.
-
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
@@ -1359,22 +1397,14 @@ config PROVE_LOCKING
 	 For more details, see Documentation/locking/lockdep-design.rst.
 
 config PROVE_RAW_LOCK_NESTING
-	bool "Enable raw_spinlock - spinlock nesting checks"
+	bool "Enable raw_spinlock - spinlock nesting checks" if !ARCH_SUPPORTS_RT
 	depends on PROVE_LOCKING
-	default n
+	default y if ARCH_SUPPORTS_RT
 	help
 	 Enable the raw_spinlock vs. spinlock nesting checks which ensure
 	 that the lock nesting rules for PREEMPT_RT enabled kernels are
 	 not violated.
 
-	 NOTE: There are known nesting problems. So if you enable this
-	 option expect lockdep splats until these problems have been fully
-	 addressed which is work in progress. This config switch allows to
-	 identify and analyze these problems. It will be removed and the
-	 check permanently enabled once the main issues have been fixed.
-
-	 If unsure, select N.
-
 config LOCK_STAT
 	bool "Lock usage statistics"
 	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
@@ -1472,41 +1502,41 @@ config LOCKDEP_SMALL
 	bool
 
 config LOCKDEP_BITS
-	int "Bitsize for MAX_LOCKDEP_ENTRIES"
+	int "Size for MAX_LOCKDEP_ENTRIES (as Nth power of 2)"
 	depends on LOCKDEP && !LOCKDEP_SMALL
-	range 10 30
+	range 10 24
 	default 15
 	help
 	  Try increasing this value if you hit "BUG: MAX_LOCKDEP_ENTRIES too low!" message.
 
 config LOCKDEP_CHAINS_BITS
-	int "Bitsize for MAX_LOCKDEP_CHAINS"
+	int "Size for MAX_LOCKDEP_CHAINS (as Nth power of 2)"
 	depends on LOCKDEP && !LOCKDEP_SMALL
-	range 10 30
+	range 10 21
 	default 16
 	help
 	  Try increasing this value if you hit "BUG: MAX_LOCKDEP_CHAINS too low!" message.
 
 config LOCKDEP_STACK_TRACE_BITS
-	int "Bitsize for MAX_STACK_TRACE_ENTRIES"
+	int "Size for MAX_STACK_TRACE_ENTRIES (as Nth power of 2)"
 	depends on LOCKDEP && !LOCKDEP_SMALL
-	range 10 30
+	range 10 26
 	default 19
 	help
 	  Try increasing this value if you hit "BUG: MAX_STACK_TRACE_ENTRIES too low!" message.
 
 config LOCKDEP_STACK_TRACE_HASH_BITS
-	int "Bitsize for STACK_TRACE_HASH_SIZE"
+	int "Size for STACK_TRACE_HASH_SIZE (as Nth power of 2)"
 	depends on LOCKDEP && !LOCKDEP_SMALL
-	range 10 30
+	range 10 26
 	default 14
 	help
 	  Try increasing this value if you need large STACK_TRACE_HASH_SIZE.
 
 config LOCKDEP_CIRCULAR_QUEUE_BITS
-	int "Bitsize for elements in circular_queue struct"
+	int "Size for elements in circular_queue struct (as Nth power of 2)"
 	depends on LOCKDEP
-	range 10 30
+	range 10 26
 	default 12
 	help
 	  Try increasing this value if you hit "lockdep bfs error:-1" warning due to __cq_enqueue() failure.
@@ -1581,6 +1611,7 @@ config SCF_TORTURE_TEST
 config CSD_LOCK_WAIT_DEBUG
 	bool "Debugging for csd_lock_wait(), called from smp_call_function*()"
 	depends on DEBUG_KERNEL
+	depends on SMP
 	depends on 64BIT
 	default n
 	help
@@ -1854,7 +1885,7 @@ config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU && DEVMEM
 	depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED
-	default y if PPC || X86 || ARM64
+	default y if PPC || X86 || ARM64 || S390
 	help
 	  If this option is disabled, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
@@ -2064,6 +2095,16 @@ config FAIL_SUNRPC
 	  Provide fault-injection capability for SunRPC and
 	  its consumers.
 
+config FAIL_SKB_REALLOC
+	bool "Fault-injection capability forcing skb to reallocate"
+	depends on FAULT_INJECTION_DEBUG_FS
+	help
+	  Provide fault-injection capability that forces the skb to be
+	  reallocated, catching possible invalid pointers to the skb.
+
+	  For more information, check
+	  Documentation/dev-tools/fault-injection/fault-injection.rst
+
 config FAULT_INJECTION_CONFIGFS
 	bool "Configfs interface for fault-injection capabilities"
 	depends on FAULT_INJECTION
@@ -2140,6 +2181,14 @@ config KCOV_IRQ_AREA_SIZE
 	  soft interrupts. This specifies the size of those areas in the
 	  number of unsigned long words.
 
+config KCOV_SELFTEST
+	bool "Perform short selftests on boot"
+	depends on KCOV
+	help
+	  Run short KCOV coverage collection selftests on boot.
+	  On test failure, causes the kernel to panic. Recommended to be
+	  enabled, ensuring critical functionality works as intended.
+
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	default y
@@ -2247,6 +2296,16 @@ config TEST_DIV64
 
 	  If unsure, say N.
 
+config TEST_MULDIV64
+	tristate "mul_u64_u64_div_u64() test"
+	depends on DEBUG_KERNEL || m
+	help
+	  Enable this to turn on 'mul_u64_u64_div_u64()' function test.
+	  This test is executed only once during system boot (so affects
+	  only boot time), or at module load time.
+
+	  If unsure, say N.
+
 config TEST_IOV_ITER
 	tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
 	depends on KUNIT
@@ -2419,6 +2478,17 @@ config TEST_RHASHTABLE
 config TEST_IDA
 	tristate "Perform selftest on IDA functions"
 
+config TEST_MISC_MINOR
+	tristate "Basic misc minor Kunit test" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Kunit test for the misc minor.
+	  It tests misc minor functions for dynamic and misc dynamic minor.
+	  This include misc_xxx functions
+
+	  If unsure, say N.
+
 config TEST_PARMAN
 	tristate "Perform selftest on priority array manager"
 	depends on PARMAN
@@ -2451,7 +2521,6 @@ config TEST_LKM
 
 config TEST_BITOPS
 	tristate "Test module for compilation of bitops operations"
-	depends on m
 	help
 	  This builds the "test_bitops" module that is much like the
 	  TEST_LKM module except that it does a basic exercise of the
@@ -2475,18 +2544,6 @@ config TEST_VMALLOC
 
 	  If unsure, say N.
 
-config TEST_USER_COPY
-	tristate "Test user/kernel boundary protections"
-	depends on m
-	help
-	  This builds the "test_user_copy" module that runs sanity checks
-	  on the copy_to/from_user infrastructure, making sure basic
-	  user/kernel boundary testing is working. If it fails to load,
-	  a regression has been detected in the user/kernel memory boundary
-	  protections.
-
-	  If unsure, say N.
-
 config TEST_BPF
 	tristate "Test BPF filter functionality"
 	depends on m && NET
@@ -2573,6 +2630,23 @@ config CHECKSUM_KUNIT
 
 	  If unsure, say N.
 
+config UTIL_MACROS_KUNIT
+	tristate "KUnit test util_macros.h functions at runtime" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test the util_macros.h function at boot.
+
+	  KUnit tests run during boot and output the results to the debug log
+	  in TAP format (http://testanything.org/). Only useful for kernel devs
+	  running the KUnit test harness, and not intended for inclusion into a
+	  production build.
+
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config HASH_KUNIT_TEST
 	tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS
 	depends on KUNIT
@@ -2596,6 +2670,7 @@ config RESOURCE_KUNIT_TEST
 	tristate "KUnit test for resource API" if !KUNIT_ALL_TESTS
 	depends on KUNIT
 	default KUNIT_ALL_TESTS
+	select GET_FREE_REGION
 	help
 	  This builds the resource API unit test.
 	  Tests the logic of API provided by resource.c and ioport.h.
@@ -2773,6 +2848,26 @@ config HW_BREAKPOINT_KUNIT_TEST
 
 	  If unsure, say N.
 
+config CRC_KUNIT_TEST
+	tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	select CRC16
+	select CRC_T10DIF
+	select CRC32
+	select CRC64
+	help
+	  Unit tests for the CRC library functions.
+
+	  This is intended to help people writing architecture-specific
+	  optimized versions.  If unsure, say N.
+
+config CRC_BENCHMARK
+	bool "Benchmark for the CRC functions"
+	depends on CRC_KUNIT_TEST
+	help
+	  Include benchmarks in the KUnit test suite for the CRC functions.
+
 config SIPHASH_KUNIT_TEST
 	tristate "Perform selftest on siphash functions" if !KUNIT_ALL_TESTS
 	depends on KUNIT
@@ -2784,6 +2879,15 @@ config SIPHASH_KUNIT_TEST
 	  This is intended to help people writing architecture-specific
 	  optimized versions.  If unsure, say N.
 
+config USERCOPY_KUNIT_TEST
+	tristate "KUnit Test for user/kernel boundary protections"
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the "usercopy_kunit" module that runs sanity checks
+	  on the copy_to/from_user infrastructure, making sure basic
+	  user/kernel boundary testing is working.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
@@ -2837,6 +2941,141 @@ config TEST_KMOD
 
 	  If unsure, say N.
 
+config TEST_RUNTIME
+	bool
+
+config TEST_RUNTIME_MODULE
+	bool
+
+config TEST_KALLSYMS
+	tristate "module kallsyms find_symbol() test"
+	depends on m
+	select TEST_RUNTIME
+	select TEST_RUNTIME_MODULE
+	select TEST_KALLSYMS_A
+	select TEST_KALLSYMS_B
+	select TEST_KALLSYMS_C
+	select TEST_KALLSYMS_D
+	help
+	  This allows us to stress test find_symbol() through the kallsyms
+	  used to place symbols on the kernel ELF kallsyms and modules kallsyms
+	  where we place kernel symbols such as exported symbols.
+
+	  We have four test modules:
+
+	  A: has KALLSYSMS_NUMSYMS exported symbols
+	  B: uses one of A's symbols
+	  C: adds KALLSYMS_SCALE_FACTOR * KALLSYSMS_NUMSYMS exported
+	  D: adds 2 * the symbols than C
+
+	  We stress test find_symbol() through two means:
+
+	  1) Upon load of B it will trigger simplify_symbols() to look for the
+	  one symbol it uses from the module A with tons of symbols. This is an
+	  indirect way for us to have B call resolve_symbol_wait() upon module
+	  load. This will eventually call find_symbol() which will eventually
+	  try to find the symbols used with find_exported_symbol_in_section().
+	  find_exported_symbol_in_section() uses bsearch() so a binary search
+	  for each symbol. Binary search will at worst be O(log(n)) so the
+	  larger TEST_MODULE_KALLSYSMS the worse the search.
+
+	  2) The selftests should load C first, before B. Upon B's load towards
+	  the end right before we call module B's init routine we get
+	  complete_formation() called on the module. That will first check
+	  for duplicate symbols with the call to verify_exported_symbols().
+	  That is when we'll force iteration on module C's insane symbol list.
+	  Since it has 10 * KALLSYMS_NUMSYMS it means we can first test
+	  just loading B without C. The amount of time it takes to load C Vs
+	  B can give us an idea of the impact growth of the symbol space and
+	  give us projection. Module A only uses one symbol from B so to allow
+	  this scaling in module C to be proportional, if it used more symbols
+	  then the first test would be doing more and increasing just the
+	  search space would be slightly different. The last module, module D
+	  will just increase the search space by twice the number of symbols in
+	  C so to allow for full projects.
+
+	  tools/testing/selftests/module/find_symbol.sh
+
+	  The current defaults will incur a build delay of about 7 minutes
+	  on an x86_64 with only 8 cores. Enable this only if you want to
+	  stress test find_symbol() with thousands of symbols. At the same
+	  time this is also useful to test building modules with thousands of
+	  symbols, and if BTF is enabled this also stress tests adding BTF
+	  information for each module. Currently enabling many more symbols
+	  will segfault the build system.
+
+	  If unsure, say N.
+
+if TEST_KALLSYMS
+
+config TEST_KALLSYMS_A
+	tristate
+	depends on m
+
+config TEST_KALLSYMS_B
+	tristate
+	depends on m
+
+config TEST_KALLSYMS_C
+	tristate
+	depends on m
+
+config TEST_KALLSYMS_D
+	tristate
+	depends on m
+
+choice
+	prompt "Kallsym test range"
+	default TEST_KALLSYMS_LARGE
+	help
+	  Selecting something other than "Fast" will enable tests which slow
+	  down the build and may crash your build.
+
+config TEST_KALLSYMS_FAST
+	bool "Fast builds"
+	help
+	  You won't really be testing kallsysms, so this just helps fast builds
+	  when allmodconfig is used..
+
+config TEST_KALLSYMS_LARGE
+	bool "Enable testing kallsyms with large exports"
+	help
+	  This will enable larger number of symbols. This will slow down
+	  your build considerably.
+
+config TEST_KALLSYMS_MAX
+	bool "Known kallsysms limits"
+	help
+	  This will enable exports to the point we know we'll start crashing
+	  builds.
+
+endchoice
+
+config TEST_KALLSYMS_NUMSYMS
+	int "test kallsyms number of symbols"
+	range 2 10000
+	default 2 if TEST_KALLSYMS_FAST
+	default 100 if TEST_KALLSYMS_LARGE
+	default 10000 if TEST_KALLSYMS_MAX
+	help
+	  The number of symbols to create on TEST_KALLSYMS_A, only one of which
+	  module TEST_KALLSYMS_B will use. This also will be used
+	  for how many symbols TEST_KALLSYMS_C will have, scaled up by
+	  TEST_KALLSYMS_SCALE_FACTOR. Note that setting this to 10,000 will
+	  trigger a segfault today, don't use anything close to it unless
+	  you are aware that this should not be used for automated build tests.
+
+config TEST_KALLSYMS_SCALE_FACTOR
+	int "test kallsyms scale factor"
+	default 8
+	help
+	  How many more unusued symbols will TEST_KALLSYSMS_C have than
+	  TEST_KALLSYMS_A. If 8, then module C will have 8 * syms
+	  than module A. Then TEST_KALLSYMS_D will have double the amount
+	  of symbols than C so to allow projections.
+
+endif # TEST_KALLSYMS
+
 config TEST_DEBUG_VIRTUAL
 	tristate "Test CONFIG_DEBUG_VIRTUAL feature"
 	depends on DEBUG_VIRTUAL
@@ -2895,7 +3134,7 @@ config TEST_FREE_PAGES
 
 config TEST_FPU
 	tristate "Test floating point operations in kernel space"
-	depends on X86 && !KCOV_INSTRUMENT_ALL
+	depends on ARCH_HAS_KERNEL_FPU_SUPPORT && !KCOV_INSTRUMENT_ALL
 	help
 	  Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu
 	  which will trigger a sequence of floating point operations. This is used
@@ -2927,6 +3166,37 @@ config TEST_OBJPOOL
 
 	  If unsure, say N.
 
+config INT_POW_TEST
+	tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This option enables the KUnit test suite for the int_pow function,
+	  which performs integer exponentiation. The test suite is designed to
+	  verify that the implementation of int_pow correctly computes the power
+	  of a given base raised to a given exponent.
+
+	  Enabling this option will include tests that check various scenarios
+	  and edge cases to ensure the accuracy and reliability of the exponentiation
+	  function.
+
+	  If unsure, say N
+
+config INT_SQRT_KUNIT_TEST
+	tristate "Integer square root test" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This option enables the KUnit test suite for the int_sqrt() function,
+	  which performs square root calculation. The test suite checks
+	  various scenarios, including edge cases, to ensure correctness.
+
+	  Enabling this option will include tests that check various scenarios
+	  and edge cases to ensure the accuracy and reliability of the square root
+	  function.
+
+	  If unsure, say N
+
 endif # RUNTIME_TESTING_MENU
 
 config ARCH_USE_MEMTEST
@@ -2994,7 +3264,7 @@ config RUST_BUILD_ASSERT_ALLOW
 	bool "Allow unoptimized build-time assertions"
 	depends on RUST
 	help
-	  Controls how are `build_error!` and `build_assert!` handled during build.
+	  Controls how `build_error!` and `build_assert!` are handled during the build.
 
 	  If calls to them exist in the binary, it may indicate a violated invariant
 	  or that the optimizer failed to verify the invariant during compilation.
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 98016e137b7f..f82889a830fa 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -195,13 +195,6 @@ config KASAN_KUNIT_TEST
 	  For more information on KUnit and unit tests in general, please refer
 	  to the KUnit documentation in Documentation/dev-tools/kunit/.
 
-config KASAN_MODULE_TEST
-	tristate "KUnit-incompatible tests of KASAN bug detection capabilities"
-	depends on m && KASAN && !KASAN_HW_TAGS
-	help
-	  A part of the KASAN test suite that is not integrated with KUnit.
-	  Incompatible with Hardware Tag-Based KASAN.
-
 config KASAN_EXTRA_INFO
 	bool "Record and report more information"
 	depends on KASAN
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index b5c0e6576749..8336b1a489a3 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -19,7 +19,7 @@ menuconfig KGDB
 	  CONFIG_FRAME_POINTER to aid in producing more reliable stack
 	  backtraces in the external debugger.  Documentation of
 	  kernel debugger is available at http://kgdb.sourceforge.net
-	  as well as in Documentation/dev-tools/kgdb.rst.  If
+	  as well as in Documentation/process/debugging/kgdb.rst.  If
 	  unsure, say N.
 
 if KGDB
@@ -122,6 +122,7 @@ config KDB_DEFAULT_ENABLE
 config KDB_KEYBOARD
 	bool "KGDB_KDB: keyboard as input device"
 	depends on VT && KGDB_KDB && !PARISC
+	depends on HAS_IOPORT
 	default n
 	help
 	  KDB can use a PS/2 type keyboard for an input device
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index e81e1ac4a919..1d4aa7a83b3a 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -4,6 +4,7 @@ config ARCH_HAS_UBSAN
 
 menuconfig UBSAN
 	bool "Undefined behaviour sanity checker"
+	depends on ARCH_HAS_UBSAN
 	help
 	  This option enables the Undefined Behaviour sanity checker.
 	  Compile-time instrumentation is used to detect various undefined
@@ -28,8 +29,8 @@ config UBSAN_TRAP
 
 	  Also note that selecting Y will cause your kernel to Oops
 	  with an "illegal instruction" error with no further details
-	  when a UBSAN violation occurs. (Except on arm64, which will
-	  report which Sanitizer failed.) This may make it hard to
+	  when a UBSAN violation occurs. (Except on arm64 and x86, which
+	  will report which Sanitizer failed.) This may make it hard to
 	  determine whether an Oops was caused by UBSAN or to figure
 	  out the details of a UBSAN violation. It makes the kernel log
 	  output less useful for bug reports.
diff --git a/lib/Makefile b/lib/Makefile
index ed8dbf4436dd..d5cfc7afbbb8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -14,6 +14,7 @@ KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 KCOV_INSTRUMENT_fault-inject.o := n
+KCOV_INSTRUMENT_find_bit.o := n
 
 # string.o implements standard library functions like memset/memcpy etc.
 # Use -ffreestanding to ensure that the compiler does not try to "optimize"
@@ -34,10 +35,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
 	 nmi_backtrace.o win_minmax.o memcat_p.o \
-	 buildid.o objpool.o
+	 buildid.o objpool.o iomem_copy.o
 
+lib-$(CONFIG_UNION_FIND) += union_find.o
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_SMP) += cpumask.o
+lib-$(CONFIG_MIN_HEAP) += min_heap.o
 
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
@@ -78,7 +81,6 @@ obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_SORT) += test_sort.o
-obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
@@ -96,6 +98,7 @@ obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
+obj-$(CONFIG_TEST_RUNTIME) += tests/
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
 obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
@@ -110,30 +113,10 @@ CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
 obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
 obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o
 
-#
-# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns
-# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS
-# get appended last to CFLAGS and thus override those previous compiler options.
-#
-FPU_CFLAGS := -msse -msse2
-ifdef CONFIG_CC_IS_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
-#
-# The "-msse" in the first argument is there so that the
-# -mpreferred-stack-boundary=3 build error:
-#
-#  -mpreferred-stack-boundary=3 is not between 4 and 12
-#
-# can be triggered. Otherwise gcc doesn't complain.
-FPU_CFLAGS += -mhard-float
-FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
-endif
-
 obj-$(CONFIG_TEST_FPU) += test_fpu.o
-CFLAGS_test_fpu.o += $(FPU_CFLAGS)
+test_fpu-y := test_fpu_glue.o test_fpu_impl.o
+CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU)
 
 # Some KUnit files (hooks.o) need to be built-in even when KUnit is a module,
 # so we can't just use obj-$(CONFIG_KUNIT).
@@ -174,16 +157,15 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o
 obj-$(CONFIG_PACKING)	+= packing.o
+obj-$(CONFIG_PACKING_KUNIT_TEST) += packing_test.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
 obj-$(CONFIG_CRC_ITU_T)	+= crc-itu-t.o
 obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_CRC64)     += crc64.o
-obj-$(CONFIG_CRC32_SELFTEST)	+= crc32test.o
 obj-$(CONFIG_CRC4)	+= crc4.o
 obj-$(CONFIG_CRC7)	+= crc7.o
-obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_CRC8)	+= crc8.o
 obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o
 obj-$(CONFIG_XXHASH)	+= xxhash.o
@@ -233,6 +215,9 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
 	of-reconfig-notifier-error-inject.o
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
+obj-$(CONFIG_CODE_TAGGING) += codetag.o
+obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o
+
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
@@ -353,7 +338,7 @@ $(obj)/oid_registry_data.c: $(srctree)/include/linux/oid_registry.h \
 	$(call cmd,build_OID_registry)
 
 quiet_cmd_build_OID_registry = GEN     $@
-      cmd_build_OID_registry = perl $(srctree)/$(src)/build_OID_registry $< $@
+      cmd_build_OID_registry = perl $(src)/build_OID_registry $< $@
 
 clean-files	+= oid_registry_data.c
 
@@ -387,6 +372,7 @@ obj-$(CONFIG_PLDMFW) += pldmfw/
 CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o
+obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
@@ -404,42 +390,12 @@ CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-overread)
 CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-truncation)
 CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
+obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o
 obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
+obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
 
 obj-$(CONFIG_FIRMWARE_TABLE) += fw_table.o
 
-# FORTIFY_SOURCE compile-time behavior tests
-TEST_FORTIFY_SRCS = $(wildcard $(srctree)/$(src)/test_fortify/*-*.c)
-TEST_FORTIFY_LOGS = $(patsubst $(srctree)/$(src)/%.c, %.log, $(TEST_FORTIFY_SRCS))
-TEST_FORTIFY_LOG = test_fortify.log
-
-quiet_cmd_test_fortify = TEST    $@
-      cmd_test_fortify = $(CONFIG_SHELL) $(srctree)/scripts/test_fortify.sh \
-			$< $@ "$(NM)" $(CC) $(c_flags) \
-			$(call cc-disable-warning,fortify-source) \
-			-DKBUILD_EXTRA_WARN1
-
-targets += $(TEST_FORTIFY_LOGS)
-clean-files += $(TEST_FORTIFY_LOGS)
-clean-files += $(addsuffix .o, $(TEST_FORTIFY_LOGS))
-$(obj)/test_fortify/%.log: $(src)/test_fortify/%.c \
-			   $(src)/test_fortify/test_fortify.h \
-			   $(srctree)/include/linux/fortify-string.h \
-			   $(srctree)/scripts/test_fortify.sh \
-			   FORCE
-	$(call if_changed,test_fortify)
-
-quiet_cmd_gen_fortify_log = GEN     $@
-      cmd_gen_fortify_log = cat </dev/null $(filter-out FORCE,$^) 2>/dev/null > $@ || true
-
-targets += $(TEST_FORTIFY_LOG)
-clean-files += $(TEST_FORTIFY_LOG)
-$(obj)/$(TEST_FORTIFY_LOG): $(addprefix $(obj)/, $(TEST_FORTIFY_LOGS)) FORCE
-	$(call if_changed,gen_fortify_log)
-
-# Fake dependency to trigger the fortify tests.
-ifeq ($(CONFIG_FORTIFY_SOURCE),y)
-$(obj)/string.o: $(obj)/$(TEST_FORTIFY_LOG)
-endif
+subdir-$(CONFIG_FORTIFY_SOURCE) += test_fortify
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
new file mode 100644
index 000000000000..19b45617bdcf
--- /dev/null
+++ b/lib/alloc_tag.c
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/alloc_tag.h>
+#include <linux/execmem.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/page_ext.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+
+#define ALLOCINFO_FILE_NAME		"allocinfo"
+#define MODULE_ALLOC_TAG_VMAP_SIZE	(100000UL * sizeof(struct alloc_tag))
+#define SECTION_START(NAME)		(CODETAG_SECTION_START_PREFIX NAME)
+#define SECTION_STOP(NAME)		(CODETAG_SECTION_STOP_PREFIX NAME)
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+static bool mem_profiling_support = true;
+#else
+static bool mem_profiling_support;
+#endif
+
+static struct codetag_type *alloc_tag_cttype;
+
+DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
+EXPORT_SYMBOL(_shared_alloc_tag);
+
+DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+			mem_alloc_profiling_key);
+EXPORT_SYMBOL(mem_alloc_profiling_key);
+
+DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
+
+struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
+unsigned long alloc_tag_ref_mask;
+int alloc_tag_ref_offs;
+
+struct allocinfo_private {
+	struct codetag_iterator iter;
+	bool print_header;
+};
+
+static void *allocinfo_start(struct seq_file *m, loff_t *pos)
+{
+	struct allocinfo_private *priv;
+	struct codetag *ct;
+	loff_t node = *pos;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	m->private = priv;
+	if (!priv)
+		return NULL;
+
+	priv->print_header = (node == 0);
+	codetag_lock_module_list(alloc_tag_cttype, true);
+	priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
+	while ((ct = codetag_next_ct(&priv->iter)) != NULL && node)
+		node--;
+
+	return ct ? priv : NULL;
+}
+
+static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
+	struct codetag *ct = codetag_next_ct(&priv->iter);
+
+	(*pos)++;
+	if (!ct)
+		return NULL;
+
+	return priv;
+}
+
+static void allocinfo_stop(struct seq_file *m, void *arg)
+{
+	struct allocinfo_private *priv = (struct allocinfo_private *)m->private;
+
+	if (priv) {
+		codetag_lock_module_list(alloc_tag_cttype, false);
+		kfree(priv);
+	}
+}
+
+static void print_allocinfo_header(struct seq_buf *buf)
+{
+	/* Output format version, so we can change it. */
+	seq_buf_printf(buf, "allocinfo - version: 1.0\n");
+	seq_buf_printf(buf, "#     <size>  <calls> <tag info>\n");
+}
+
+static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
+{
+	struct alloc_tag *tag = ct_to_alloc_tag(ct);
+	struct alloc_tag_counters counter = alloc_tag_read(tag);
+	s64 bytes = counter.bytes;
+
+	seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
+	codetag_to_text(out, ct);
+	seq_buf_putc(out, ' ');
+	seq_buf_putc(out, '\n');
+}
+
+static int allocinfo_show(struct seq_file *m, void *arg)
+{
+	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
+	char *bufp;
+	size_t n = seq_get_buf(m, &bufp);
+	struct seq_buf buf;
+
+	seq_buf_init(&buf, bufp, n);
+	if (priv->print_header) {
+		print_allocinfo_header(&buf);
+		priv->print_header = false;
+	}
+	alloc_tag_to_text(&buf, priv->iter.ct);
+	seq_commit(m, seq_buf_used(&buf));
+	return 0;
+}
+
+static const struct seq_operations allocinfo_seq_op = {
+	.start	= allocinfo_start,
+	.next	= allocinfo_next,
+	.stop	= allocinfo_stop,
+	.show	= allocinfo_show,
+};
+
+size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
+{
+	struct codetag_iterator iter;
+	struct codetag *ct;
+	struct codetag_bytes n;
+	unsigned int i, nr = 0;
+
+	if (can_sleep)
+		codetag_lock_module_list(alloc_tag_cttype, true);
+	else if (!codetag_trylock_module_list(alloc_tag_cttype))
+		return 0;
+
+	iter = codetag_get_ct_iter(alloc_tag_cttype);
+	while ((ct = codetag_next_ct(&iter))) {
+		struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
+
+		n.ct	= ct;
+		n.bytes = counter.bytes;
+
+		for (i = 0; i < nr; i++)
+			if (n.bytes > tags[i].bytes)
+				break;
+
+		if (i < count) {
+			nr -= nr == count;
+			memmove(&tags[i + 1],
+				&tags[i],
+				sizeof(tags[0]) * (nr - i));
+			nr++;
+			tags[i] = n;
+		}
+	}
+
+	codetag_lock_module_list(alloc_tag_cttype, false);
+
+	return nr;
+}
+
+void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
+{
+	int i;
+	struct alloc_tag *tag;
+	unsigned int nr_pages = 1 << new_order;
+
+	if (!mem_alloc_profiling_enabled())
+		return;
+
+	tag = pgalloc_tag_get(&folio->page);
+	if (!tag)
+		return;
+
+	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
+		union pgtag_ref_handle handle;
+		union codetag_ref ref;
+
+		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
+			/* Set new reference to point to the original tag */
+			alloc_tag_ref_set(&ref, tag);
+			update_page_tag_ref(handle, &ref);
+			put_page_tag_ref(handle);
+		}
+	}
+}
+
+void pgalloc_tag_swap(struct folio *new, struct folio *old)
+{
+	union pgtag_ref_handle handle_old, handle_new;
+	union codetag_ref ref_old, ref_new;
+	struct alloc_tag *tag_old, *tag_new;
+
+	if (!mem_alloc_profiling_enabled())
+		return;
+
+	tag_old = pgalloc_tag_get(&old->page);
+	if (!tag_old)
+		return;
+	tag_new = pgalloc_tag_get(&new->page);
+	if (!tag_new)
+		return;
+
+	if (!get_page_tag_ref(&old->page, &ref_old, &handle_old))
+		return;
+	if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) {
+		put_page_tag_ref(handle_old);
+		return;
+	}
+
+	/*
+	 * Clear tag references to avoid debug warning when using
+	 * __alloc_tag_ref_set() with non-empty reference.
+	 */
+	set_codetag_empty(&ref_old);
+	set_codetag_empty(&ref_new);
+
+	/* swap tags */
+	__alloc_tag_ref_set(&ref_old, tag_new);
+	update_page_tag_ref(handle_old, &ref_old);
+	__alloc_tag_ref_set(&ref_new, tag_old);
+	update_page_tag_ref(handle_new, &ref_new);
+
+	put_page_tag_ref(handle_old);
+	put_page_tag_ref(handle_new);
+}
+
+static void shutdown_mem_profiling(bool remove_file)
+{
+	if (mem_alloc_profiling_enabled())
+		static_branch_disable(&mem_alloc_profiling_key);
+
+	if (!mem_profiling_support)
+		return;
+
+	if (remove_file)
+		remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
+	mem_profiling_support = false;
+}
+
+static void __init procfs_init(void)
+{
+	if (!mem_profiling_support)
+		return;
+
+	if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) {
+		pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
+		shutdown_mem_profiling(false);
+	}
+}
+
+void __init alloc_tag_sec_init(void)
+{
+	struct alloc_tag *last_codetag;
+
+	if (!mem_profiling_support)
+		return;
+
+	if (!static_key_enabled(&mem_profiling_compressed))
+		return;
+
+	kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
+					SECTION_START(ALLOC_TAG_SECTION_NAME));
+	last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
+					SECTION_STOP(ALLOC_TAG_SECTION_NAME));
+	kernel_tags.count = last_codetag - kernel_tags.first_tag;
+
+	/* Check if kernel tags fit into page flags */
+	if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
+		shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
+		pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
+			kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
+		return;
+	}
+
+	alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
+	alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
+	pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
+		 NR_UNUSED_PAGEFLAG_BITS);
+}
+
+#ifdef CONFIG_MODULES
+
+static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
+static struct vm_struct *vm_module_tags;
+/* A dummy object used to indicate an unloaded module */
+static struct module unloaded_mod;
+/* A dummy object used to indicate a module prepended area */
+static struct module prepend_mod;
+
+struct alloc_tag_module_section module_tags;
+
+static inline unsigned long alloc_tag_align(unsigned long val)
+{
+	if (!static_key_enabled(&mem_profiling_compressed)) {
+		/* No alignment requirements when we are not indexing the tags */
+		return val;
+	}
+
+	if (val % sizeof(struct alloc_tag) == 0)
+		return val;
+	return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
+}
+
+static bool ensure_alignment(unsigned long align, unsigned int *prepend)
+{
+	if (!static_key_enabled(&mem_profiling_compressed)) {
+		/* No alignment requirements when we are not indexing the tags */
+		return true;
+	}
+
+	/*
+	 * If alloc_tag size is not a multiple of required alignment, tag
+	 * indexing does not work.
+	 */
+	if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
+		return false;
+
+	/* Ensure prepend consumes multiple of alloc_tag-sized blocks */
+	if (*prepend)
+		*prepend = alloc_tag_align(*prepend);
+
+	return true;
+}
+
+static inline bool tags_addressable(void)
+{
+	unsigned long tag_idx_count;
+
+	if (!static_key_enabled(&mem_profiling_compressed))
+		return true; /* with page_ext tags are always addressable */
+
+	tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
+			module_tags.size / sizeof(struct alloc_tag);
+
+	return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
+}
+
+static bool needs_section_mem(struct module *mod, unsigned long size)
+{
+	if (!mem_profiling_support)
+		return false;
+
+	return size >= sizeof(struct alloc_tag);
+}
+
+static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to)
+{
+	while (from <= to) {
+		struct alloc_tag_counters counter;
+
+		counter = alloc_tag_read(from);
+		if (counter.bytes)
+			return from;
+		from++;
+	}
+
+	return NULL;
+}
+
+/* Called with mod_area_mt locked */
+static void clean_unused_module_areas_locked(void)
+{
+	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
+	struct module *val;
+
+	mas_for_each(&mas, val, module_tags.size) {
+		if (val != &unloaded_mod)
+			continue;
+
+		/* Release area if all tags are unused */
+		if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
+				   (struct alloc_tag *)(module_tags.start_addr + mas.last)))
+			mas_erase(&mas);
+	}
+}
+
+/* Called with mod_area_mt locked */
+static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
+			      unsigned long size, unsigned int prepend, unsigned long align)
+{
+	bool cleanup_done = false;
+
+repeat:
+	/* Try finding exact size and hope the start is aligned */
+	if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) {
+		if (IS_ALIGNED(mas->index + prepend, align))
+			return true;
+
+		/* Try finding larger area to align later */
+		mas_reset(mas);
+		if (!mas_empty_area(mas, 0, section_size - 1,
+				    size + prepend + align - 1))
+			return true;
+	}
+
+	/* No free area, try cleanup stale data and repeat the search once */
+	if (!cleanup_done) {
+		clean_unused_module_areas_locked();
+		cleanup_done = true;
+		mas_reset(mas);
+		goto repeat;
+	}
+
+	return false;
+}
+
+static int vm_module_tags_populate(void)
+{
+	unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) +
+				 (vm_module_tags->nr_pages << PAGE_SHIFT);
+	unsigned long new_end = module_tags.start_addr + module_tags.size;
+
+	if (phys_end < new_end) {
+		struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
+		unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN);
+		unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN);
+		unsigned long more_pages;
+		unsigned long nr;
+
+		more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT;
+		nr = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN,
+					   NUMA_NO_NODE, more_pages, next_page);
+		if (nr < more_pages ||
+		    vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL,
+				     next_page, PAGE_SHIFT) < 0) {
+			/* Clean up and error out */
+			for (int i = 0; i < nr; i++)
+				__free_page(next_page[i]);
+			return -ENOMEM;
+		}
+
+		vm_module_tags->nr_pages += nr;
+
+		/*
+		 * Kasan allocates 1 byte of shadow for every 8 bytes of data.
+		 * When kasan_alloc_module_shadow allocates shadow memory,
+		 * its unit of allocation is a page.
+		 * Therefore, here we need to align to MODULE_ALIGN.
+		 */
+		if (old_shadow_end < new_shadow_end)
+			kasan_alloc_module_shadow((void *)old_shadow_end,
+						  new_shadow_end - old_shadow_end,
+						  GFP_KERNEL);
+	}
+
+	/*
+	 * Mark the pages as accessible, now that they are mapped.
+	 * With hardware tag-based KASAN, marking is skipped for
+	 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
+	 */
+	kasan_unpoison_vmalloc((void *)module_tags.start_addr,
+				new_end - module_tags.start_addr,
+				KASAN_VMALLOC_PROT_NORMAL);
+
+	return 0;
+}
+
+static void *reserve_module_tags(struct module *mod, unsigned long size,
+				 unsigned int prepend, unsigned long align)
+{
+	unsigned long section_size = module_tags.end_addr - module_tags.start_addr;
+	MA_STATE(mas, &mod_area_mt, 0, section_size - 1);
+	unsigned long offset;
+	void *ret = NULL;
+
+	/* If no tags return error */
+	if (size < sizeof(struct alloc_tag))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
+	 * align 0 or 1 means no alignment, to simplify set to 1.
+	 */
+	if (!align)
+		align = 1;
+
+	if (!ensure_alignment(align, &prepend)) {
+		shutdown_mem_profiling(true);
+		pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
+			mod->name, align);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mas_lock(&mas);
+	if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+
+	/* Mark found area as reserved */
+	offset = mas.index;
+	offset += prepend;
+	offset = ALIGN(offset, align);
+	if (offset != mas.index) {
+		unsigned long pad_start = mas.index;
+
+		mas.last = offset - 1;
+		mas_store(&mas, &prepend_mod);
+		if (mas_is_err(&mas)) {
+			ret = ERR_PTR(xa_err(mas.node));
+			goto unlock;
+		}
+		mas.index = offset;
+		mas.last = offset + size - 1;
+		mas_store(&mas, mod);
+		if (mas_is_err(&mas)) {
+			mas.index = pad_start;
+			mas_erase(&mas);
+			ret = ERR_PTR(xa_err(mas.node));
+		}
+	} else {
+		mas.last = offset + size - 1;
+		mas_store(&mas, mod);
+		if (mas_is_err(&mas))
+			ret = ERR_PTR(xa_err(mas.node));
+	}
+unlock:
+	mas_unlock(&mas);
+
+	if (IS_ERR(ret))
+		return ret;
+
+	if (module_tags.size < offset + size) {
+		int grow_res;
+
+		module_tags.size = offset + size;
+		if (mem_alloc_profiling_enabled() && !tags_addressable()) {
+			shutdown_mem_profiling(true);
+			pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
+				mod->name, NR_UNUSED_PAGEFLAG_BITS);
+		}
+
+		grow_res = vm_module_tags_populate();
+		if (grow_res) {
+			shutdown_mem_profiling(true);
+			pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
+			       mod->name);
+			return ERR_PTR(grow_res);
+		}
+	}
+
+	return (struct alloc_tag *)(module_tags.start_addr + offset);
+}
+
+static void release_module_tags(struct module *mod, bool used)
+{
+	MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
+	struct alloc_tag *tag;
+	struct module *val;
+
+	mas_lock(&mas);
+	mas_for_each_rev(&mas, val, 0)
+		if (val == mod)
+			break;
+
+	if (!val) /* module not found */
+		goto out;
+
+	if (!used)
+		goto release_area;
+
+	/* Find out if the area is used */
+	tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
+			    (struct alloc_tag *)(module_tags.start_addr + mas.last));
+	if (tag) {
+		struct alloc_tag_counters counter = alloc_tag_read(tag);
+
+		pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
+			tag->ct.filename, tag->ct.lineno, tag->ct.modname,
+			tag->ct.function, counter.bytes);
+	} else {
+		used = false;
+	}
+release_area:
+	mas_store(&mas, used ? &unloaded_mod : NULL);
+	val = mas_prev_range(&mas, 0);
+	if (val == &prepend_mod)
+		mas_store(&mas, NULL);
+out:
+	mas_unlock(&mas);
+}
+
+static void replace_module(struct module *mod, struct module *new_mod)
+{
+	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
+	struct module *val;
+
+	mas_lock(&mas);
+	mas_for_each(&mas, val, module_tags.size) {
+		if (val != mod)
+			continue;
+
+		mas_store_gfp(&mas, new_mod, GFP_KERNEL);
+		break;
+	}
+	mas_unlock(&mas);
+}
+
+static int __init alloc_mod_tags_mem(void)
+{
+	/* Map space to copy allocation tags */
+	vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
+	if (!vm_module_tags) {
+		pr_err("Failed to map %lu bytes for module allocation tags\n",
+			MODULE_ALLOC_TAG_VMAP_SIZE);
+		module_tags.start_addr = 0;
+		return -ENOMEM;
+	}
+
+	vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
+					sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
+	if (!vm_module_tags->pages) {
+		free_vm_area(vm_module_tags);
+		return -ENOMEM;
+	}
+
+	module_tags.start_addr = (unsigned long)vm_module_tags->addr;
+	module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
+	/* Ensure the base is alloc_tag aligned when required for indexing */
+	module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
+
+	return 0;
+}
+
+static void __init free_mod_tags_mem(void)
+{
+	int i;
+
+	module_tags.start_addr = 0;
+	for (i = 0; i < vm_module_tags->nr_pages; i++)
+		__free_page(vm_module_tags->pages[i]);
+	kfree(vm_module_tags->pages);
+	free_vm_area(vm_module_tags);
+}
+
+#else /* CONFIG_MODULES */
+
+static inline int alloc_mod_tags_mem(void) { return 0; }
+static inline void free_mod_tags_mem(void) {}
+
+#endif /* CONFIG_MODULES */
+
+/* See: Documentation/mm/allocation-profiling.rst */
+static int __init setup_early_mem_profiling(char *str)
+{
+	bool compressed = false;
+	bool enable;
+
+	if (!str || !str[0])
+		return -EINVAL;
+
+	if (!strncmp(str, "never", 5)) {
+		enable = false;
+		mem_profiling_support = false;
+		pr_info("Memory allocation profiling is disabled!\n");
+	} else {
+		char *token = strsep(&str, ",");
+
+		if (kstrtobool(token, &enable))
+			return -EINVAL;
+
+		if (str) {
+
+			if (strcmp(str, "compressed"))
+				return -EINVAL;
+
+			compressed = true;
+		}
+		mem_profiling_support = true;
+		pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
+			compressed ? "with" : "without", enable ? "on" : "off");
+	}
+
+	if (enable != mem_alloc_profiling_enabled()) {
+		if (enable)
+			static_branch_enable(&mem_alloc_profiling_key);
+		else
+			static_branch_disable(&mem_alloc_profiling_key);
+	}
+	if (compressed != static_key_enabled(&mem_profiling_compressed)) {
+		if (compressed)
+			static_branch_enable(&mem_profiling_compressed);
+		else
+			static_branch_disable(&mem_profiling_compressed);
+	}
+
+	return 0;
+}
+early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
+
+static __init bool need_page_alloc_tagging(void)
+{
+	if (static_key_enabled(&mem_profiling_compressed))
+		return false;
+
+	return mem_profiling_support;
+}
+
+static __init void init_page_alloc_tagging(void)
+{
+}
+
+struct page_ext_operations page_alloc_tagging_ops = {
+	.size = sizeof(union codetag_ref),
+	.need = need_page_alloc_tagging,
+	.init = init_page_alloc_tagging,
+};
+EXPORT_SYMBOL(page_alloc_tagging_ops);
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table memory_allocation_profiling_sysctls[] = {
+	{
+		.procname	= "mem_profiling",
+		.data		= &mem_alloc_profiling_key,
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+		.mode		= 0444,
+#else
+		.mode		= 0644,
+#endif
+		.proc_handler	= proc_do_static_key,
+	},
+};
+
+static void __init sysctl_init(void)
+{
+	if (!mem_profiling_support)
+		memory_allocation_profiling_sysctls[0].mode = 0444;
+
+	register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+}
+#else /* CONFIG_SYSCTL */
+static inline void sysctl_init(void) {}
+#endif /* CONFIG_SYSCTL */
+
+static int __init alloc_tag_init(void)
+{
+	const struct codetag_type_desc desc = {
+		.section		= ALLOC_TAG_SECTION_NAME,
+		.tag_size		= sizeof(struct alloc_tag),
+#ifdef CONFIG_MODULES
+		.needs_section_mem	= needs_section_mem,
+		.alloc_section_mem	= reserve_module_tags,
+		.free_section_mem	= release_module_tags,
+		.module_replaced	= replace_module,
+#endif
+	};
+	int res;
+
+	res = alloc_mod_tags_mem();
+	if (res)
+		return res;
+
+	alloc_tag_cttype = codetag_register_type(&desc);
+	if (IS_ERR(alloc_tag_cttype)) {
+		free_mod_tags_mem();
+		return PTR_ERR(alloc_tag_cttype);
+	}
+
+	sysctl_init();
+	procfs_init();
+
+	return 0;
+}
+module_init(alloc_tag_init);
diff --git a/lib/asn1_encoder.c b/lib/asn1_encoder.c
index 0fd3c454a468..92f35aae13b1 100644
--- a/lib/asn1_encoder.c
+++ b/lib/asn1_encoder.c
@@ -449,4 +449,5 @@ asn1_encode_boolean(unsigned char *data, const unsigned char *end_data,
 }
 EXPORT_SYMBOL_GPL(asn1_encode_boolean);
 
+MODULE_DESCRIPTION("Simple encoder primitives for ASN.1 BER/DER/CER");
 MODULE_LICENSE("GPL");
diff --git a/lib/atomic64.c b/lib/atomic64.c
index caf895789a1e..1a72bba36d24 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -25,15 +25,15 @@
  * Ensure each lock is in a separate cacheline.
  */
 static union {
-	raw_spinlock_t lock;
+	arch_spinlock_t lock;
 	char pad[L1_CACHE_BYTES];
 } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
 	[0 ... (NR_LOCKS - 1)] = {
-		.lock =  __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+		.lock =  __ARCH_SPIN_LOCK_UNLOCKED,
 	},
 };
 
-static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
+static inline arch_spinlock_t *lock_addr(const atomic64_t *v)
 {
 	unsigned long addr = (unsigned long) v;
 
@@ -45,12 +45,14 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
 s64 generic_atomic64_read(const atomic64_t *v)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	arch_spinlock_t *lock = lock_addr(v);
 	s64 val;
 
-	raw_spin_lock_irqsave(lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(lock);
 	val = v->counter;
-	raw_spin_unlock_irqrestore(lock, flags);
+	arch_spin_unlock(lock);
+	local_irq_restore(flags);
 	return val;
 }
 EXPORT_SYMBOL(generic_atomic64_read);
@@ -58,11 +60,13 @@ EXPORT_SYMBOL(generic_atomic64_read);
 void generic_atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	arch_spinlock_t *lock = lock_addr(v);
 
-	raw_spin_lock_irqsave(lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(lock);
 	v->counter = i;
-	raw_spin_unlock_irqrestore(lock, flags);
+	arch_spin_unlock(lock);
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(generic_atomic64_set);
 
@@ -70,11 +74,13 @@ EXPORT_SYMBOL(generic_atomic64_set);
 void generic_atomic64_##op(s64 a, atomic64_t *v)			\
 {									\
 	unsigned long flags;						\
-	raw_spinlock_t *lock = lock_addr(v);				\
+	arch_spinlock_t *lock = lock_addr(v);				\
 									\
-	raw_spin_lock_irqsave(lock, flags);				\
+	local_irq_save(flags);						\
+	arch_spin_lock(lock);						\
 	v->counter c_op a;						\
-	raw_spin_unlock_irqrestore(lock, flags);			\
+	arch_spin_unlock(lock);						\
+	local_irq_restore(flags);					\
 }									\
 EXPORT_SYMBOL(generic_atomic64_##op);
 
@@ -82,12 +88,14 @@ EXPORT_SYMBOL(generic_atomic64_##op);
 s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v)		\
 {									\
 	unsigned long flags;						\
-	raw_spinlock_t *lock = lock_addr(v);				\
+	arch_spinlock_t *lock = lock_addr(v);				\
 	s64 val;							\
 									\
-	raw_spin_lock_irqsave(lock, flags);				\
+	local_irq_save(flags);						\
+	arch_spin_lock(lock);						\
 	val = (v->counter c_op a);					\
-	raw_spin_unlock_irqrestore(lock, flags);			\
+	arch_spin_unlock(lock);						\
+	local_irq_restore(flags);					\
 	return val;							\
 }									\
 EXPORT_SYMBOL(generic_atomic64_##op##_return);
@@ -96,13 +104,15 @@ EXPORT_SYMBOL(generic_atomic64_##op##_return);
 s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v)			\
 {									\
 	unsigned long flags;						\
-	raw_spinlock_t *lock = lock_addr(v);				\
+	arch_spinlock_t *lock = lock_addr(v);				\
 	s64 val;							\
 									\
-	raw_spin_lock_irqsave(lock, flags);				\
+	local_irq_save(flags);						\
+	arch_spin_lock(lock);						\
 	val = v->counter;						\
 	v->counter c_op a;						\
-	raw_spin_unlock_irqrestore(lock, flags);			\
+	arch_spin_unlock(lock);						\
+	local_irq_restore(flags);					\
 	return val;							\
 }									\
 EXPORT_SYMBOL(generic_atomic64_fetch_##op);
@@ -131,14 +141,16 @@ ATOMIC64_OPS(xor, ^=)
 s64 generic_atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	arch_spinlock_t *lock = lock_addr(v);
 	s64 val;
 
-	raw_spin_lock_irqsave(lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(lock);
 	val = v->counter - 1;
 	if (val >= 0)
 		v->counter = val;
-	raw_spin_unlock_irqrestore(lock, flags);
+	arch_spin_unlock(lock);
+	local_irq_restore(flags);
 	return val;
 }
 EXPORT_SYMBOL(generic_atomic64_dec_if_positive);
@@ -146,14 +158,16 @@ EXPORT_SYMBOL(generic_atomic64_dec_if_positive);
 s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	arch_spinlock_t *lock = lock_addr(v);
 	s64 val;
 
-	raw_spin_lock_irqsave(lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(lock);
 	val = v->counter;
 	if (val == o)
 		v->counter = n;
-	raw_spin_unlock_irqrestore(lock, flags);
+	arch_spin_unlock(lock);
+	local_irq_restore(flags);
 	return val;
 }
 EXPORT_SYMBOL(generic_atomic64_cmpxchg);
@@ -161,13 +175,15 @@ EXPORT_SYMBOL(generic_atomic64_cmpxchg);
 s64 generic_atomic64_xchg(atomic64_t *v, s64 new)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	arch_spinlock_t *lock = lock_addr(v);
 	s64 val;
 
-	raw_spin_lock_irqsave(lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(lock);
 	val = v->counter;
 	v->counter = new;
-	raw_spin_unlock_irqrestore(lock, flags);
+	arch_spin_unlock(lock);
+	local_irq_restore(flags);
 	return val;
 }
 EXPORT_SYMBOL(generic_atomic64_xchg);
@@ -175,14 +191,16 @@ EXPORT_SYMBOL(generic_atomic64_xchg);
 s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	unsigned long flags;
-	raw_spinlock_t *lock = lock_addr(v);
+	arch_spinlock_t *lock = lock_addr(v);
 	s64 val;
 
-	raw_spin_lock_irqsave(lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(lock);
 	val = v->counter;
 	if (val != u)
 		v->counter += a;
-	raw_spin_unlock_irqrestore(lock, flags);
+	arch_spin_unlock(lock);
+	local_irq_restore(flags);
 
 	return val;
 }
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d9d170238165..759ea1783cc5 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -273,4 +273,5 @@ static __exit void test_atomics_exit(void) {}
 module_init(test_atomics_init);
 module_exit(test_atomics_exit);
 
+MODULE_DESCRIPTION("Testsuite for atomic64_t functions");
 MODULE_LICENSE("GPL");
diff --git a/lib/bcd.c b/lib/bcd.c
index 7e4750b6e801..c5e79ba9cd7b 100644
--- a/lib/bcd.c
+++ b/lib/bcd.c
@@ -10,6 +10,8 @@ EXPORT_SYMBOL(_bcd2bin);
 
 unsigned char _bin2bcd(unsigned val)
 {
-	return ((val / 10) << 4) + val % 10;
+	const unsigned int t = (val * 103) >> 10;
+
+	return (t << 4) | (val - t * 10);
 }
 EXPORT_SYMBOL(_bin2bcd);
diff --git a/lib/bch.c b/lib/bch.c
index 5f71fd76eca8..1c0cb07cdfeb 100644
--- a/lib/bch.c
+++ b/lib/bch.c
@@ -479,11 +479,8 @@ static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 		/* find suitable row for elimination */
 		for (r = p; r < m; r++) {
 			if (rows[r] & mask) {
-				if (r != p) {
-					tmp = rows[r];
-					rows[r] = rows[p];
-					rows[p] = tmp;
-				}
+				if (r != p)
+					swap(rows[r], rows[p]);
 				rem = r+1;
 				break;
 			}
@@ -799,21 +796,14 @@ static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 				   struct gf_poly *b)
 {
-	struct gf_poly *tmp;
-
 	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 
-	if (a->deg < b->deg) {
-		tmp = b;
-		b = a;
-		a = tmp;
-	}
+	if (a->deg < b->deg)
+		swap(a, b);
 
 	while (b->deg > 0) {
 		gf_poly_mod(bch, a, b, NULL);
-		tmp = b;
-		b = a;
-		a = tmp;
+		swap(a, b);
 	}
 
 	dbg("%s\n", gf_poly_str(a));
diff --git a/lib/bitfield_kunit.c b/lib/bitfield_kunit.c
index 1473d8b4bf0f..5ccd86f61896 100644
--- a/lib/bitfield_kunit.c
+++ b/lib/bitfield_kunit.c
@@ -151,4 +151,5 @@ static struct kunit_suite bitfields_test_suite = {
 kunit_test_suites(&bitfields_test_suite);
 
 MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_DESCRIPTION("Test cases for bitfield helpers");
 MODULE_LICENSE("GPL");
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 97f8911ea339..81f29c29f47b 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -4,8 +4,16 @@
  * Masami Hiramatsu <mhiramat@kernel.org>
  */
 
-#ifdef __KERNEL__
+/*
+ * NOTE: This is only for tools/bootconfig, because tools/bootconfig will
+ * run the parser sanity test.
+ * This does NOT mean lib/bootconfig.c is available in the user space.
+ * However, if you change this file, please make sure the tools/bootconfig
+ * has no issue on building and running.
+ */
 #include <linux/bootconfig.h>
+
+#ifdef __KERNEL__
 #include <linux/bug.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
@@ -24,16 +32,6 @@ const char * __init xbc_get_embedded_bootconfig(size_t *size)
 	return (*size) ? embedded_bootconfig_data : NULL;
 }
 #endif
-
-#else /* !__KERNEL__ */
-/*
- * NOTE: This is only for tools/bootconfig, because tools/bootconfig will
- * run the parser sanity test.
- * This does NOT mean lib/bootconfig.c is available in the user space.
- * However, if you change this file, please make sure the tools/bootconfig
- * has no issue on building and running.
- */
-#include <linux/bootconfig.h>
 #endif
 
 /*
diff --git a/lib/btree.c b/lib/btree.c
index 49420cae3a83..bb81d3393ac5 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -43,7 +43,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define NODESIZE MAX(L1_CACHE_BYTES, 128)
 
 struct btree_geo {
diff --git a/lib/build_OID_registry b/lib/build_OID_registry
index d7fc32ea8ac2..8267e8d71338 100755
--- a/lib/build_OID_registry
+++ b/lib/build_OID_registry
@@ -8,6 +8,7 @@
 #
 
 use strict;
+use Cwd qw(abs_path);
 
 my @names = ();
 my @oids = ();
@@ -17,6 +18,8 @@ if ($#ARGV != 1) {
     exit(2);
 }
 
+my $abs_srctree = abs_path($ENV{'srctree'});
+
 #
 # Open the file to read from
 #
@@ -35,7 +38,9 @@ close IN_FILE || die;
 #
 open C_FILE, ">$ARGV[1]" or die;
 print C_FILE "/*\n";
-print C_FILE " * Automatically generated by ", $0, ".  Do not edit\n";
+my $scriptname = $0;
+$scriptname =~ s#^\Q$abs_srctree/\E##;
+print C_FILE " * Automatically generated by ", $scriptname, ".  Do not edit\n";
 print C_FILE " */\n";
 
 #
diff --git a/lib/buildid.c b/lib/buildid.c
index 898301b49eb6..c4b0f376fb34 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -5,143 +5,310 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/pagemap.h>
+#include <linux/secretmem.h>
 
 #define BUILD_ID 3
 
+#define MAX_PHDR_CNT 256
+
+struct freader {
+	void *buf;
+	u32 buf_sz;
+	int err;
+	union {
+		struct {
+			struct file *file;
+			struct folio *folio;
+			void *addr;
+			loff_t folio_off;
+			bool may_fault;
+		};
+		struct {
+			const char *data;
+			u64 data_sz;
+		};
+	};
+};
+
+static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
+				   struct file *file, bool may_fault)
+{
+	memset(r, 0, sizeof(*r));
+	r->buf = buf;
+	r->buf_sz = buf_sz;
+	r->file = file;
+	r->may_fault = may_fault;
+}
+
+static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
+{
+	memset(r, 0, sizeof(*r));
+	r->data = data;
+	r->data_sz = data_sz;
+}
+
+static void freader_put_folio(struct freader *r)
+{
+	if (!r->folio)
+		return;
+	kunmap_local(r->addr);
+	folio_put(r->folio);
+	r->folio = NULL;
+}
+
+static int freader_get_folio(struct freader *r, loff_t file_off)
+{
+	/* check if we can just reuse current folio */
+	if (r->folio && file_off >= r->folio_off &&
+	    file_off < r->folio_off + folio_size(r->folio))
+		return 0;
+
+	freader_put_folio(r);
+
+	/* reject secretmem folios created with memfd_secret() */
+	if (secretmem_mapping(r->file->f_mapping))
+		return -EFAULT;
+
+	r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
+
+	/* if sleeping is allowed, wait for the page, if necessary */
+	if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) {
+		filemap_invalidate_lock_shared(r->file->f_mapping);
+		r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT,
+					    NULL, r->file);
+		filemap_invalidate_unlock_shared(r->file->f_mapping);
+	}
+
+	if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
+		if (!IS_ERR(r->folio))
+			folio_put(r->folio);
+		r->folio = NULL;
+		return -EFAULT;
+	}
+
+	r->folio_off = folio_pos(r->folio);
+	r->addr = kmap_local_folio(r->folio, 0);
+
+	return 0;
+}
+
+static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
+{
+	size_t folio_sz;
+
+	/* provided internal temporary buffer should be sized correctly */
+	if (WARN_ON(r->buf && sz > r->buf_sz)) {
+		r->err = -E2BIG;
+		return NULL;
+	}
+
+	if (unlikely(file_off + sz < file_off)) {
+		r->err = -EOVERFLOW;
+		return NULL;
+	}
+
+	/* working with memory buffer is much more straightforward */
+	if (!r->buf) {
+		if (file_off + sz > r->data_sz) {
+			r->err = -ERANGE;
+			return NULL;
+		}
+		return r->data + file_off;
+	}
+
+	/* fetch or reuse folio for given file offset */
+	r->err = freader_get_folio(r, file_off);
+	if (r->err)
+		return NULL;
+
+	/* if requested data is crossing folio boundaries, we have to copy
+	 * everything into our local buffer to keep a simple linear memory
+	 * access interface
+	 */
+	folio_sz = folio_size(r->folio);
+	if (file_off + sz > r->folio_off + folio_sz) {
+		int part_sz = r->folio_off + folio_sz - file_off;
+
+		/* copy the part that resides in the current folio */
+		memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz);
+
+		/* fetch next folio */
+		r->err = freader_get_folio(r, r->folio_off + folio_sz);
+		if (r->err)
+			return NULL;
+
+		/* copy the rest of requested data */
+		memcpy(r->buf + part_sz, r->addr, sz - part_sz);
+
+		return r->buf;
+	}
+
+	/* if data fits in a single folio, just return direct pointer */
+	return r->addr + (file_off - r->folio_off);
+}
+
+static void freader_cleanup(struct freader *r)
+{
+	if (!r->buf)
+		return; /* non-file-backed mode */
+
+	freader_put_folio(r);
+}
+
 /*
  * Parse build id from the note segment. This logic can be shared between
  * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
  * identical.
  */
-static int parse_build_id_buf(unsigned char *build_id,
-			      __u32 *size,
-			      const void *note_start,
-			      Elf32_Word note_size)
+static int parse_build_id(struct freader *r, unsigned char *build_id, __u32 *size,
+			  loff_t note_off, Elf32_Word note_size)
 {
-	Elf32_Word note_offs = 0, new_offs;
+	const char note_name[] = "GNU";
+	const size_t note_name_sz = sizeof(note_name);
+	u32 build_id_off, new_off, note_end, name_sz, desc_sz;
+	const Elf32_Nhdr *nhdr;
+	const char *data;
+
+	if (check_add_overflow(note_off, note_size, &note_end))
+		return -EINVAL;
 
-	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
-		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+	while (note_end - note_off > sizeof(Elf32_Nhdr) + note_name_sz) {
+		nhdr = freader_fetch(r, note_off, sizeof(Elf32_Nhdr) + note_name_sz);
+		if (!nhdr)
+			return r->err;
+
+		name_sz = READ_ONCE(nhdr->n_namesz);
+		desc_sz = READ_ONCE(nhdr->n_descsz);
+
+		new_off = note_off + sizeof(Elf32_Nhdr);
+		if (check_add_overflow(new_off, ALIGN(name_sz, 4), &new_off) ||
+		    check_add_overflow(new_off, ALIGN(desc_sz, 4), &new_off) ||
+		    new_off > note_end)
+			break;
 
 		if (nhdr->n_type == BUILD_ID &&
-		    nhdr->n_namesz == sizeof("GNU") &&
-		    !strcmp((char *)(nhdr + 1), "GNU") &&
-		    nhdr->n_descsz > 0 &&
-		    nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
-			memcpy(build_id,
-			       note_start + note_offs +
-			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
-			       nhdr->n_descsz);
-			memset(build_id + nhdr->n_descsz, 0,
-			       BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+		    name_sz == note_name_sz &&
+		    memcmp(nhdr + 1, note_name, note_name_sz) == 0 &&
+		    desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
+			build_id_off = note_off + sizeof(Elf32_Nhdr) + ALIGN(note_name_sz, 4);
+
+			/* freader_fetch() will invalidate nhdr pointer */
+			data = freader_fetch(r, build_id_off, desc_sz);
+			if (!data)
+				return r->err;
+
+			memcpy(build_id, data, desc_sz);
+			memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
 			if (size)
-				*size = nhdr->n_descsz;
+				*size = desc_sz;
 			return 0;
 		}
-		new_offs = note_offs + sizeof(Elf32_Nhdr) +
-			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
-		if (new_offs <= note_offs)  /* overflow */
-			break;
-		note_offs = new_offs;
+
+		note_off = new_off;
 	}
 
 	return -EINVAL;
 }
 
-static inline int parse_build_id(const void *page_addr,
-				 unsigned char *build_id,
-				 __u32 *size,
-				 const void *note_start,
-				 Elf32_Word note_size)
+/* Parse build ID from 32-bit ELF */
+static int get_build_id_32(struct freader *r, unsigned char *build_id, __u32 *size)
 {
-	/* check for overflow */
-	if (note_start < page_addr || note_start + note_size < note_start)
-		return -EINVAL;
+	const Elf32_Ehdr *ehdr;
+	const Elf32_Phdr *phdr;
+	__u32 phnum, phoff, i;
 
-	/* only supports note that fits in the first page */
-	if (note_start + note_size > page_addr + PAGE_SIZE)
-		return -EINVAL;
+	ehdr = freader_fetch(r, 0, sizeof(Elf32_Ehdr));
+	if (!ehdr)
+		return r->err;
 
-	return parse_build_id_buf(build_id, size, note_start, note_size);
-}
+	/* subsequent freader_fetch() calls invalidate pointers, so remember locally */
+	phnum = READ_ONCE(ehdr->e_phnum);
+	phoff = READ_ONCE(ehdr->e_phoff);
 
-/* Parse build ID from 32-bit ELF */
-static int get_build_id_32(const void *page_addr, unsigned char *build_id,
-			   __u32 *size)
-{
-	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
-	Elf32_Phdr *phdr;
-	int i;
+	/* set upper bound on amount of segments (phdrs) we iterate */
+	if (phnum > MAX_PHDR_CNT)
+		phnum = MAX_PHDR_CNT;
 
-	/* only supports phdr that fits in one page */
-	if (ehdr->e_phnum >
-	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+	/* check that phoff is not large enough to cause an overflow */
+	if (phoff + phnum * sizeof(Elf32_Phdr) < phoff)
 		return -EINVAL;
 
-	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+	for (i = 0; i < phnum; ++i) {
+		phdr = freader_fetch(r, phoff + i * sizeof(Elf32_Phdr), sizeof(Elf32_Phdr));
+		if (!phdr)
+			return r->err;
 
-	for (i = 0; i < ehdr->e_phnum; ++i) {
-		if (phdr[i].p_type == PT_NOTE &&
-		    !parse_build_id(page_addr, build_id, size,
-				    page_addr + phdr[i].p_offset,
-				    phdr[i].p_filesz))
+		if (phdr->p_type == PT_NOTE &&
+		    !parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset),
+				    READ_ONCE(phdr->p_filesz)))
 			return 0;
 	}
 	return -EINVAL;
 }
 
 /* Parse build ID from 64-bit ELF */
-static int get_build_id_64(const void *page_addr, unsigned char *build_id,
-			   __u32 *size)
+static int get_build_id_64(struct freader *r, unsigned char *build_id, __u32 *size)
 {
-	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
-	Elf64_Phdr *phdr;
-	int i;
+	const Elf64_Ehdr *ehdr;
+	const Elf64_Phdr *phdr;
+	__u32 phnum, i;
+	__u64 phoff;
+
+	ehdr = freader_fetch(r, 0, sizeof(Elf64_Ehdr));
+	if (!ehdr)
+		return r->err;
+
+	/* subsequent freader_fetch() calls invalidate pointers, so remember locally */
+	phnum = READ_ONCE(ehdr->e_phnum);
+	phoff = READ_ONCE(ehdr->e_phoff);
+
+	/* set upper bound on amount of segments (phdrs) we iterate */
+	if (phnum > MAX_PHDR_CNT)
+		phnum = MAX_PHDR_CNT;
 
-	/* only supports phdr that fits in one page */
-	if (ehdr->e_phnum >
-	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+	/* check that phoff is not large enough to cause an overflow */
+	if (phoff + phnum * sizeof(Elf64_Phdr) < phoff)
 		return -EINVAL;
 
-	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+	for (i = 0; i < phnum; ++i) {
+		phdr = freader_fetch(r, phoff + i * sizeof(Elf64_Phdr), sizeof(Elf64_Phdr));
+		if (!phdr)
+			return r->err;
 
-	for (i = 0; i < ehdr->e_phnum; ++i) {
-		if (phdr[i].p_type == PT_NOTE &&
-		    !parse_build_id(page_addr, build_id, size,
-				    page_addr + phdr[i].p_offset,
-				    phdr[i].p_filesz))
+		if (phdr->p_type == PT_NOTE &&
+		    !parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset),
+				    READ_ONCE(phdr->p_filesz)))
 			return 0;
 	}
+
 	return -EINVAL;
 }
 
-/*
- * Parse build ID of ELF file mapped to vma
- * @vma:      vma object
- * @build_id: buffer to store build id, at least BUILD_ID_SIZE long
- * @size:     returns actual build id size in case of success
- *
- * Return: 0 on success, -EINVAL otherwise
- */
-int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
-		   __u32 *size)
+/* enough for Elf64_Ehdr, Elf64_Phdr, and all the smaller requests */
+#define MAX_FREADER_BUF_SZ 64
+
+static int __build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+			    __u32 *size, bool may_fault)
 {
-	Elf32_Ehdr *ehdr;
-	struct page *page;
-	void *page_addr;
+	const Elf32_Ehdr *ehdr;
+	struct freader r;
+	char buf[MAX_FREADER_BUF_SZ];
 	int ret;
 
 	/* only works for page backed storage  */
 	if (!vma->vm_file)
 		return -EINVAL;
 
-	page = find_get_page(vma->vm_file->f_mapping, 0);
-	if (!page)
-		return -EFAULT;	/* page not mapped */
+	freader_init_from_file(&r, buf, sizeof(buf), vma->vm_file, may_fault);
+
+	/* fetch first 18 bytes of ELF header for checks */
+	ehdr = freader_fetch(&r, 0, offsetofend(Elf32_Ehdr, e_type));
+	if (!ehdr) {
+		ret = r.err;
+		goto out;
+	}
 
 	ret = -EINVAL;
-	page_addr = kmap_local_page(page);
-	ehdr = (Elf32_Ehdr *)page_addr;
 
 	/* compare magic x7f "ELF" */
 	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
@@ -152,15 +319,46 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
 		goto out;
 
 	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
-		ret = get_build_id_32(page_addr, build_id, size);
+		ret = get_build_id_32(&r, build_id, size);
 	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
-		ret = get_build_id_64(page_addr, build_id, size);
+		ret = get_build_id_64(&r, build_id, size);
 out:
-	kunmap_local(page_addr);
-	put_page(page);
+	freader_cleanup(&r);
 	return ret;
 }
 
+/*
+ * Parse build ID of ELF file mapped to vma
+ * @vma:      vma object
+ * @build_id: buffer to store build id, at least BUILD_ID_SIZE long
+ * @size:     returns actual build id size in case of success
+ *
+ * Assumes no page fault can be taken, so if relevant portions of ELF file are
+ * not already paged in, fetching of build ID fails.
+ *
+ * Return: 0 on success; negative error, otherwise
+ */
+int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size)
+{
+	return __build_id_parse(vma, build_id, size, false /* !may_fault */);
+}
+
+/*
+ * Parse build ID of ELF file mapped to VMA
+ * @vma:      vma object
+ * @build_id: buffer to store build id, at least BUILD_ID_SIZE long
+ * @size:     returns actual build id size in case of success
+ *
+ * Assumes faultable context and can cause page faults to bring in file data
+ * into page cache.
+ *
+ * Return: 0 on success; negative error, otherwise
+ */
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size)
+{
+	return __build_id_parse(vma, build_id, size, true /* may_fault */);
+}
+
 /**
  * build_id_parse_buf - Get build ID from a buffer
  * @buf:      ELF note section(s) to parse
@@ -171,7 +369,15 @@ out:
  */
 int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size)
 {
-	return parse_build_id_buf(build_id, NULL, buf, buf_size);
+	struct freader r;
+	int err;
+
+	freader_init_from_mem(&r, buf, buf_size);
+
+	err = parse_build_id(&r, build_id, NULL, 0, buf_size);
+
+	freader_cleanup(&r);
+	return err;
 }
 
 #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)
@@ -182,8 +388,8 @@ unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init;
  */
 void __init init_vmlinux_build_id(void)
 {
-	extern const void __start_notes __weak;
-	extern const void __stop_notes __weak;
+	extern const void __start_notes;
+	extern const void __stop_notes;
 	unsigned int size = &__stop_notes - &__start_notes;
 
 	build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
diff --git a/lib/checksum.c b/lib/checksum.c
index 6860d6b05a17..025ba546e1ec 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -34,15 +34,6 @@
 #include <asm/byteorder.h>
 
 #ifndef do_csum
-static inline unsigned short from32to16(unsigned int x)
-{
-	/* add up 16-bit and 16-bit for 16+c bit */
-	x = (x & 0xffff) + (x >> 16);
-	/* add up carry.. */
-	x = (x & 0xffff) + (x >> 16);
-	return x;
-}
-
 static unsigned int do_csum(const unsigned char *buff, int len)
 {
 	int odd;
@@ -90,7 +81,7 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 #else
 		result += (*buff << 8);
 #endif
-	result = from32to16(result);
+	result = csum_from32to16(result);
 	if (odd)
 		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
 out:
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index 404dba36bae3..be04aa42125c 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -468,12 +468,9 @@ static __wsum to_wsum(u32 x)
 
 static void assert_setup_correct(struct kunit *test)
 {
-	CHECK_EQ(sizeof(random_buf) / sizeof(random_buf[0]), MAX_LEN);
-	CHECK_EQ(sizeof(expected_results) / sizeof(expected_results[0]),
-		 MAX_LEN);
-	CHECK_EQ(sizeof(init_sums_no_overflow) /
-			 sizeof(init_sums_no_overflow[0]),
-		 MAX_LEN);
+	CHECK_EQ(ARRAY_SIZE(random_buf), MAX_LEN);
+	CHECK_EQ(ARRAY_SIZE(expected_results), MAX_LEN);
+	CHECK_EQ(ARRAY_SIZE(init_sums_no_overflow), MAX_LEN);
 }
 
 /*
@@ -639,4 +636,5 @@ static struct kunit_suite checksum_test_suite = {
 kunit_test_suites(&checksum_test_suite);
 
 MODULE_AUTHOR("Noah Goldstein <goldstein.w.n@gmail.com>");
+MODULE_DESCRIPTION("Test cases csum_* APIs");
 MODULE_LICENSE("GPL");
diff --git a/lib/closure.c b/lib/closure.c
index c16540552d61..2bfe7d2a0048 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,14 +13,25 @@
 #include <linux/seq_file.h>
 #include <linux/sched/debug.h>
 
-static inline void closure_put_after_sub(struct closure *cl, int flags)
+static inline void closure_put_after_sub_checks(int flags)
 {
 	int r = flags & CLOSURE_REMAINING_MASK;
 
-	BUG_ON(flags & CLOSURE_GUARD_MASK);
-	BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
+	if (WARN(flags & CLOSURE_GUARD_MASK,
+		 "closure has guard bits set: %x (%u)",
+		 flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
+		r &= ~CLOSURE_GUARD_MASK;
+
+	WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
+	     "closure ref hit 0 with incorrect flags set: %x (%u)",
+	     flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+}
+
+static inline void closure_put_after_sub(struct closure *cl, int flags)
+{
+	closure_put_after_sub_checks(flags);
 
-	if (!r) {
+	if (!(flags & CLOSURE_REMAINING_MASK)) {
 		smp_acquire__after_ctrl_dep();
 
 		cl->closure_get_happened = false;
@@ -139,6 +150,78 @@ void __sched __closure_sync(struct closure *cl)
 }
 EXPORT_SYMBOL(__closure_sync);
 
+/*
+ * closure_return_sync - finish running a closure, synchronously (i.e. waiting
+ * for outstanding get()s to finish) and returning once closure refcount is 0.
+ *
+ * Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
+ * closure_get_not_zero() calls waill fail.
+ */
+void __sched closure_return_sync(struct closure *cl)
+{
+	struct closure_syncer s = { .task = current };
+
+	cl->s = &s;
+	set_closure_fn(cl, closure_sync_fn, NULL);
+
+	unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
+						   &cl->remaining);
+
+	closure_put_after_sub_checks(flags);
+
+	if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
+		while (1) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (s.done)
+				break;
+			schedule();
+		}
+
+		__set_current_state(TASK_RUNNING);
+	}
+
+	if (cl->parent)
+		closure_put(cl->parent);
+}
+EXPORT_SYMBOL(closure_return_sync);
+
+int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
+{
+	struct closure_syncer s = { .task = current };
+	int ret = 0;
+
+	cl->s = &s;
+	continue_at(cl, closure_sync_fn, NULL);
+
+	while (1) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (s.done)
+			break;
+		if (!timeout) {
+			/*
+			 * Carefully undo the continue_at() - but only if it
+			 * hasn't completed, i.e. the final closure_put() hasn't
+			 * happened yet:
+			 */
+			unsigned old, new, v = atomic_read(&cl->remaining);
+			do {
+				old = v;
+				if (!old || (old & CLOSURE_RUNNING))
+					goto success;
+
+				new = old + CLOSURE_REMAINING_INITIALIZER;
+			} while ((v = atomic_cmpxchg(&cl->remaining, old, new)) != old);
+			ret = -ETIME;
+		}
+
+		timeout = schedule_timeout(timeout);
+	}
+success:
+	__set_current_state(TASK_RUNNING);
+	return ret;
+}
+EXPORT_SYMBOL(__closure_sync_timeout);
+
 #ifdef CONFIG_DEBUG_CLOSURES
 
 static LIST_HEAD(closure_list);
@@ -161,6 +244,9 @@ void closure_debug_destroy(struct closure *cl)
 {
 	unsigned long flags;
 
+	if (cl->magic == CLOSURE_MAGIC_STACK)
+		return;
+
 	BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
 	cl->magic = CLOSURE_MAGIC_DEAD;
 
@@ -192,7 +278,7 @@ static int debug_show(struct seq_file *f, void *data)
 			seq_printf(f, " W %pS\n",
 				   (void *) cl->waiting_on);
 
-		seq_puts(f, "\n");
+		seq_putc(f, '\n');
 	}
 
 	spin_unlock_irq(&closure_list_lock);
diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c
index 705b82736be0..c1602f797637 100644
--- a/lib/cmdline_kunit.c
+++ b/lib/cmdline_kunit.c
@@ -153,4 +153,5 @@ static struct kunit_suite cmdline_test_suite = {
 };
 kunit_test_suite(cmdline_test_suite);
 
+MODULE_DESCRIPTION("Test cases for API provided by cmdline.c");
 MODULE_LICENSE("GPL");
diff --git a/lib/codetag.c b/lib/codetag.c
new file mode 100644
index 000000000000..42aadd6c1454
--- /dev/null
+++ b/lib/codetag.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/codetag.h>
+#include <linux/idr.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/seq_buf.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+struct codetag_type {
+	struct list_head link;
+	unsigned int count;
+	struct idr mod_idr;
+	struct rw_semaphore mod_lock; /* protects mod_idr */
+	struct codetag_type_desc desc;
+};
+
+struct codetag_range {
+	struct codetag *start;
+	struct codetag *stop;
+};
+
+struct codetag_module {
+	struct module *mod;
+	struct codetag_range range;
+};
+
+static DEFINE_MUTEX(codetag_lock);
+static LIST_HEAD(codetag_types);
+
+void codetag_lock_module_list(struct codetag_type *cttype, bool lock)
+{
+	if (lock)
+		down_read(&cttype->mod_lock);
+	else
+		up_read(&cttype->mod_lock);
+}
+
+bool codetag_trylock_module_list(struct codetag_type *cttype)
+{
+	return down_read_trylock(&cttype->mod_lock) != 0;
+}
+
+struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype)
+{
+	struct codetag_iterator iter = {
+		.cttype = cttype,
+		.cmod = NULL,
+		.mod_id = 0,
+		.ct = NULL,
+	};
+
+	return iter;
+}
+
+static inline struct codetag *get_first_module_ct(struct codetag_module *cmod)
+{
+	return cmod->range.start < cmod->range.stop ? cmod->range.start : NULL;
+}
+
+static inline
+struct codetag *get_next_module_ct(struct codetag_iterator *iter)
+{
+	struct codetag *res = (struct codetag *)
+			((char *)iter->ct + iter->cttype->desc.tag_size);
+
+	return res < iter->cmod->range.stop ? res : NULL;
+}
+
+struct codetag *codetag_next_ct(struct codetag_iterator *iter)
+{
+	struct codetag_type *cttype = iter->cttype;
+	struct codetag_module *cmod;
+	struct codetag *ct;
+
+	lockdep_assert_held(&cttype->mod_lock);
+
+	if (unlikely(idr_is_empty(&cttype->mod_idr)))
+		return NULL;
+
+	ct = NULL;
+	while (true) {
+		cmod = idr_find(&cttype->mod_idr, iter->mod_id);
+
+		/* If module was removed move to the next one */
+		if (!cmod)
+			cmod = idr_get_next_ul(&cttype->mod_idr,
+					       &iter->mod_id);
+
+		/* Exit if no more modules */
+		if (!cmod)
+			break;
+
+		if (cmod != iter->cmod) {
+			iter->cmod = cmod;
+			ct = get_first_module_ct(cmod);
+		} else
+			ct = get_next_module_ct(iter);
+
+		if (ct)
+			break;
+
+		iter->mod_id++;
+	}
+
+	iter->ct = ct;
+	return ct;
+}
+
+void codetag_to_text(struct seq_buf *out, struct codetag *ct)
+{
+	if (ct->modname)
+		seq_buf_printf(out, "%s:%u [%s] func:%s",
+			       ct->filename, ct->lineno,
+			       ct->modname, ct->function);
+	else
+		seq_buf_printf(out, "%s:%u func:%s",
+			       ct->filename, ct->lineno, ct->function);
+}
+
+static inline size_t range_size(const struct codetag_type *cttype,
+				const struct codetag_range *range)
+{
+	return ((char *)range->stop - (char *)range->start) /
+			cttype->desc.tag_size;
+}
+
+static void *get_symbol(struct module *mod, const char *prefix, const char *name)
+{
+	DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN);
+	const char *buf;
+	void *ret;
+
+	seq_buf_printf(&sb, "%s%s", prefix, name);
+	if (seq_buf_has_overflowed(&sb))
+		return NULL;
+
+	buf = seq_buf_str(&sb);
+	preempt_disable();
+	ret = mod ?
+		(void *)find_kallsyms_symbol_value(mod, buf) :
+		(void *)kallsyms_lookup_name(buf);
+	preempt_enable();
+
+	return ret;
+}
+
+static struct codetag_range get_section_range(struct module *mod,
+					      const char *section)
+{
+	return (struct codetag_range) {
+		get_symbol(mod, CODETAG_SECTION_START_PREFIX, section),
+		get_symbol(mod, CODETAG_SECTION_STOP_PREFIX, section),
+	};
+}
+
+static const char *get_mod_name(__maybe_unused struct module *mod)
+{
+#ifdef CONFIG_MODULES
+	if (mod)
+		return mod->name;
+#endif
+	return "(built-in)";
+}
+
+static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
+{
+	struct codetag_range range;
+	struct codetag_module *cmod;
+	int err;
+
+	range = get_section_range(mod, cttype->desc.section);
+	if (!range.start || !range.stop) {
+		pr_warn("Failed to load code tags of type %s from the module %s\n",
+			cttype->desc.section, get_mod_name(mod));
+		return -EINVAL;
+	}
+
+	/* Ignore empty ranges */
+	if (range.start == range.stop)
+		return 0;
+
+	BUG_ON(range.start > range.stop);
+
+	cmod = kmalloc(sizeof(*cmod), GFP_KERNEL);
+	if (unlikely(!cmod))
+		return -ENOMEM;
+
+	cmod->mod = mod;
+	cmod->range = range;
+
+	down_write(&cttype->mod_lock);
+	err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL);
+	if (err >= 0) {
+		cttype->count += range_size(cttype, &range);
+		if (cttype->desc.module_load)
+			cttype->desc.module_load(cttype, cmod);
+	}
+	up_write(&cttype->mod_lock);
+
+	if (err < 0) {
+		kfree(cmod);
+		return err;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_MODULES
+#define CODETAG_SECTION_PREFIX	".codetag."
+
+/* Some codetag types need a separate module section */
+bool codetag_needs_module_section(struct module *mod, const char *name,
+				  unsigned long size)
+{
+	const char *type_name;
+	struct codetag_type *cttype;
+	bool ret = false;
+
+	if (strncmp(name, CODETAG_SECTION_PREFIX, strlen(CODETAG_SECTION_PREFIX)))
+		return false;
+
+	type_name = name + strlen(CODETAG_SECTION_PREFIX);
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (strcmp(type_name, cttype->desc.section) == 0) {
+			if (!cttype->desc.needs_section_mem)
+				break;
+
+			down_write(&cttype->mod_lock);
+			ret = cttype->desc.needs_section_mem(mod, size);
+			up_write(&cttype->mod_lock);
+			break;
+		}
+	}
+	mutex_unlock(&codetag_lock);
+
+	return ret;
+}
+
+void *codetag_alloc_module_section(struct module *mod, const char *name,
+				   unsigned long size, unsigned int prepend,
+				   unsigned long align)
+{
+	const char *type_name = name + strlen(CODETAG_SECTION_PREFIX);
+	struct codetag_type *cttype;
+	void *ret = ERR_PTR(-EINVAL);
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (strcmp(type_name, cttype->desc.section) == 0) {
+			if (WARN_ON(!cttype->desc.alloc_section_mem))
+				break;
+
+			down_write(&cttype->mod_lock);
+			ret = cttype->desc.alloc_section_mem(mod, size, prepend, align);
+			up_write(&cttype->mod_lock);
+			break;
+		}
+	}
+	mutex_unlock(&codetag_lock);
+
+	return ret;
+}
+
+void codetag_free_module_sections(struct module *mod)
+{
+	struct codetag_type *cttype;
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (!cttype->desc.free_section_mem)
+			continue;
+
+		down_write(&cttype->mod_lock);
+		cttype->desc.free_section_mem(mod, false);
+		up_write(&cttype->mod_lock);
+	}
+	mutex_unlock(&codetag_lock);
+}
+
+void codetag_module_replaced(struct module *mod, struct module *new_mod)
+{
+	struct codetag_type *cttype;
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		if (!cttype->desc.module_replaced)
+			continue;
+
+		down_write(&cttype->mod_lock);
+		cttype->desc.module_replaced(mod, new_mod);
+		up_write(&cttype->mod_lock);
+	}
+	mutex_unlock(&codetag_lock);
+}
+
+void codetag_load_module(struct module *mod)
+{
+	struct codetag_type *cttype;
+
+	if (!mod)
+		return;
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link)
+		codetag_module_init(cttype, mod);
+	mutex_unlock(&codetag_lock);
+}
+
+void codetag_unload_module(struct module *mod)
+{
+	struct codetag_type *cttype;
+
+	if (!mod)
+		return;
+
+	/* await any module's kfree_rcu() operations to complete */
+	kvfree_rcu_barrier();
+
+	mutex_lock(&codetag_lock);
+	list_for_each_entry(cttype, &codetag_types, link) {
+		struct codetag_module *found = NULL;
+		struct codetag_module *cmod;
+		unsigned long mod_id, tmp;
+
+		down_write(&cttype->mod_lock);
+		idr_for_each_entry_ul(&cttype->mod_idr, cmod, tmp, mod_id) {
+			if (cmod->mod && cmod->mod == mod) {
+				found = cmod;
+				break;
+			}
+		}
+		if (found) {
+			if (cttype->desc.module_unload)
+				cttype->desc.module_unload(cttype, cmod);
+
+			cttype->count -= range_size(cttype, &cmod->range);
+			idr_remove(&cttype->mod_idr, mod_id);
+			kfree(cmod);
+		}
+		up_write(&cttype->mod_lock);
+		if (found && cttype->desc.free_section_mem)
+			cttype->desc.free_section_mem(mod, true);
+	}
+	mutex_unlock(&codetag_lock);
+}
+#endif /* CONFIG_MODULES */
+
+struct codetag_type *
+codetag_register_type(const struct codetag_type_desc *desc)
+{
+	struct codetag_type *cttype;
+	int err;
+
+	BUG_ON(desc->tag_size <= 0);
+
+	cttype = kzalloc(sizeof(*cttype), GFP_KERNEL);
+	if (unlikely(!cttype))
+		return ERR_PTR(-ENOMEM);
+
+	cttype->desc = *desc;
+	idr_init(&cttype->mod_idr);
+	init_rwsem(&cttype->mod_lock);
+
+	err = codetag_module_init(cttype, NULL);
+	if (unlikely(err)) {
+		kfree(cttype);
+		return ERR_PTR(err);
+	}
+
+	mutex_lock(&codetag_lock);
+	list_add_tail(&cttype->link, &codetag_types);
+	mutex_unlock(&codetag_lock);
+
+	return cttype;
+}
diff --git a/lib/cpumask.c b/lib/cpumask.c
index e77ee9d46f71..57274ba8b6d9 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -83,10 +83,7 @@ EXPORT_SYMBOL(alloc_cpumask_var_node);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES);
-	if (!*mask)
-		panic("%s: Failed to allocate %u bytes\n", __func__,
-		      cpumask_size());
+	*mask = memblock_alloc_or_panic(cpumask_size(), SMP_CACHE_BYTES);
 }
 
 /**
diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c
index a105e6369efc..6b62a6bdd50e 100644
--- a/lib/cpumask_kunit.c
+++ b/lib/cpumask_kunit.c
@@ -152,4 +152,5 @@ static struct kunit_suite test_cpumask_suite = {
 };
 kunit_test_suite(test_cpumask_suite);
 
+MODULE_DESCRIPTION("KUnit tests for cpumask");
 MODULE_LICENSE("GPL");
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 1ed2ed487097..311c2ab829f1 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -9,123 +9,57 @@
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/crc-t10dif.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
-#include <crypto/algapi.h>
-#include <linux/static_key.h>
-#include <linux/notifier.h>
 
-static struct crypto_shash __rcu *crct10dif_tfm;
-static DEFINE_STATIC_KEY_TRUE(crct10dif_fallback);
-static DEFINE_MUTEX(crc_t10dif_mutex);
-static struct work_struct crct10dif_rehash_work;
-
-static int crc_t10dif_notify(struct notifier_block *self, unsigned long val, void *data)
-{
-	struct crypto_alg *alg = data;
-
-	if (val != CRYPTO_MSG_ALG_LOADED ||
-	    strcmp(alg->cra_name, CRC_T10DIF_STRING))
-		return NOTIFY_DONE;
-
-	schedule_work(&crct10dif_rehash_work);
-	return NOTIFY_OK;
-}
-
-static void crc_t10dif_rehash(struct work_struct *work)
-{
-	struct crypto_shash *new, *old;
-
-	mutex_lock(&crc_t10dif_mutex);
-	old = rcu_dereference_protected(crct10dif_tfm,
-					lockdep_is_held(&crc_t10dif_mutex));
-	new = crypto_alloc_shash(CRC_T10DIF_STRING, 0, 0);
-	if (IS_ERR(new)) {
-		mutex_unlock(&crc_t10dif_mutex);
-		return;
-	}
-	rcu_assign_pointer(crct10dif_tfm, new);
-	mutex_unlock(&crc_t10dif_mutex);
-
-	if (old) {
-		synchronize_rcu();
-		crypto_free_shash(old);
-	} else {
-		static_branch_disable(&crct10dif_fallback);
-	}
-}
-
-static struct notifier_block crc_t10dif_nb = {
-	.notifier_call = crc_t10dif_notify,
+/*
+ * Table generated using the following polynomial:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const u16 t10_dif_crc_table[256] = {
+	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
 };
 
-__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
-{
-	struct {
-		struct shash_desc shash;
-		__u16 crc;
-	} desc;
-	int err;
-
-	if (static_branch_unlikely(&crct10dif_fallback))
-		return crc_t10dif_generic(crc, buffer, len);
-
-	rcu_read_lock();
-	desc.shash.tfm = rcu_dereference(crct10dif_tfm);
-	desc.crc = crc;
-	err = crypto_shash_update(&desc.shash, buffer, len);
-	rcu_read_unlock();
-
-	BUG_ON(err);
-
-	return desc.crc;
-}
-EXPORT_SYMBOL(crc_t10dif_update);
-
-__u16 crc_t10dif(const unsigned char *buffer, size_t len)
-{
-	return crc_t10dif_update(0, buffer, len);
-}
-EXPORT_SYMBOL(crc_t10dif);
-
-static int __init crc_t10dif_mod_init(void)
-{
-	INIT_WORK(&crct10dif_rehash_work, crc_t10dif_rehash);
-	crypto_register_notifier(&crc_t10dif_nb);
-	crc_t10dif_rehash(&crct10dif_rehash_work);
-	return 0;
-}
-
-static void __exit crc_t10dif_mod_fini(void)
-{
-	crypto_unregister_notifier(&crc_t10dif_nb);
-	cancel_work_sync(&crct10dif_rehash_work);
-	crypto_free_shash(rcu_dereference_protected(crct10dif_tfm, 1));
-}
-
-module_init(crc_t10dif_mod_init);
-module_exit(crc_t10dif_mod_fini);
-
-static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp)
+u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len)
 {
-	struct crypto_shash *tfm;
-	int len;
+	size_t i;
 
-	if (static_branch_unlikely(&crct10dif_fallback))
-		return sprintf(buffer, "fallback\n");
+	for (i = 0; i < len; i++)
+		crc = (crc << 8) ^ t10_dif_crc_table[(crc >> 8) ^ p[i]];
 
-	rcu_read_lock();
-	tfm = rcu_dereference(crct10dif_tfm);
-	len = snprintf(buffer, PAGE_SIZE, "%s\n",
-		       crypto_shash_driver_name(tfm));
-	rcu_read_unlock();
-
-	return len;
+	return crc;
 }
+EXPORT_SYMBOL(crc_t10dif_generic);
 
-module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0444);
-
-MODULE_DESCRIPTION("T10 DIF CRC calculation (library API)");
+MODULE_DESCRIPTION("T10 DIF CRC calculation");
 MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crct10dif");
diff --git a/lib/crc32.c b/lib/crc32.c
index 5649847d0a8d..ede6131f66fc 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -30,20 +30,6 @@
 #include <linux/crc32poly.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/sched.h>
-#include "crc32defs.h"
-
-#if CRC_LE_BITS > 8
-# define tole(x) ((__force u32) cpu_to_le32(x))
-#else
-# define tole(x) (x)
-#endif
-
-#if CRC_BE_BITS > 8
-# define tobe(x) ((__force u32) cpu_to_be32(x))
-#else
-# define tobe(x) (x)
-#endif
 
 #include "crc32table.h"
 
@@ -51,162 +37,21 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
-#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
-
-/* implements slicing-by-4 or slicing-by-8 algorithm */
-static inline u32 __pure
-crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
 {
-# ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
-#  define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \
-		   t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255])
-#  define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \
-		   t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255])
-# else
-#  define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
-#  define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \
-		   t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255])
-#  define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \
-		   t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255])
-# endif
-	const u32 *b;
-	size_t    rem_len;
-# ifdef CONFIG_X86
-	size_t i;
-# endif
-	const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
-# if CRC_LE_BITS != 32
-	const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
-# endif
-	u32 q;
-
-	/* Align it */
-	if (unlikely((long)buf & 3 && len)) {
-		do {
-			DO_CRC(*buf++);
-		} while ((--len) && ((long)buf)&3);
-	}
-
-# if CRC_LE_BITS == 32
-	rem_len = len & 3;
-	len = len >> 2;
-# else
-	rem_len = len & 7;
-	len = len >> 3;
-# endif
-
-	b = (const u32 *)buf;
-# ifdef CONFIG_X86
-	--b;
-	for (i = 0; i < len; i++) {
-# else
-	for (--b; len; --len) {
-# endif
-		q = crc ^ *++b; /* use pre increment for speed */
-# if CRC_LE_BITS == 32
-		crc = DO_CRC4;
-# else
-		crc = DO_CRC8;
-		q = *++b;
-		crc ^= DO_CRC4;
-# endif
-	}
-	len = rem_len;
-	/* And the last few bytes */
-	if (len) {
-		u8 *p = (u8 *)(b + 1) - 1;
-# ifdef CONFIG_X86
-		for (i = 0; i < len; i++)
-			DO_CRC(*++p); /* use pre increment for speed */
-# else
-		do {
-			DO_CRC(*++p); /* use pre increment for speed */
-		} while (--len);
-# endif
-	}
+	while (len--)
+		crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++];
 	return crc;
-#undef DO_CRC
-#undef DO_CRC4
-#undef DO_CRC8
 }
-#endif
-
+EXPORT_SYMBOL(crc32_le_base);
 
-/**
- * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
- *			CRC32/CRC32C
- * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for other
- *	 uses, or the previous crc32/crc32c value if computing incrementally.
- * @p: pointer to buffer over which CRC32/CRC32C is run
- * @len: length of buffer @p
- * @tab: little-endian Ethernet table
- * @polynomial: CRC32/CRC32c LE polynomial
- */
-static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
-					  size_t len, const u32 (*tab)[256],
-					  u32 polynomial)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
 {
-#if CRC_LE_BITS == 1
-	int i;
-	while (len--) {
-		crc ^= *p++;
-		for (i = 0; i < 8; i++)
-			crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
-	}
-# elif CRC_LE_BITS == 2
-	while (len--) {
-		crc ^= *p++;
-		crc = (crc >> 2) ^ tab[0][crc & 3];
-		crc = (crc >> 2) ^ tab[0][crc & 3];
-		crc = (crc >> 2) ^ tab[0][crc & 3];
-		crc = (crc >> 2) ^ tab[0][crc & 3];
-	}
-# elif CRC_LE_BITS == 4
-	while (len--) {
-		crc ^= *p++;
-		crc = (crc >> 4) ^ tab[0][crc & 15];
-		crc = (crc >> 4) ^ tab[0][crc & 15];
-	}
-# elif CRC_LE_BITS == 8
-	/* aka Sarwate algorithm */
-	while (len--) {
-		crc ^= *p++;
-		crc = (crc >> 8) ^ tab[0][crc & 255];
-	}
-# else
-	crc = (__force u32) __cpu_to_le32(crc);
-	crc = crc32_body(crc, p, len, tab);
-	crc = __le32_to_cpu((__force __le32)crc);
-#endif
+	while (len--)
+		crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++];
 	return crc;
 }
-
-#if CRC_LE_BITS == 1
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
-}
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
-}
-#else
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
-}
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
-}
-#endif
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(__crc32c_le);
-
-u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
-u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
-u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
+EXPORT_SYMBOL(crc32c_le_base);
 
 /*
  * This multiplies the polynomials x and y modulo the given modulus.
@@ -281,64 +126,10 @@ u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
 EXPORT_SYMBOL(crc32_le_shift);
 EXPORT_SYMBOL(__crc32c_le_shift);
 
-/**
- * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
- * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for
- *	other uses, or the previous crc32 value if computing incrementally.
- * @p: pointer to buffer over which CRC32 is run
- * @len: length of buffer @p
- * @tab: big-endian Ethernet table
- * @polynomial: CRC32 BE polynomial
- */
-static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
-					  size_t len, const u32 (*tab)[256],
-					  u32 polynomial)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
 {
-#if CRC_BE_BITS == 1
-	int i;
-	while (len--) {
-		crc ^= *p++ << 24;
-		for (i = 0; i < 8; i++)
-			crc =
-			    (crc << 1) ^ ((crc & 0x80000000) ? polynomial :
-					  0);
-	}
-# elif CRC_BE_BITS == 2
-	while (len--) {
-		crc ^= *p++ << 24;
-		crc = (crc << 2) ^ tab[0][crc >> 30];
-		crc = (crc << 2) ^ tab[0][crc >> 30];
-		crc = (crc << 2) ^ tab[0][crc >> 30];
-		crc = (crc << 2) ^ tab[0][crc >> 30];
-	}
-# elif CRC_BE_BITS == 4
-	while (len--) {
-		crc ^= *p++ << 24;
-		crc = (crc << 4) ^ tab[0][crc >> 28];
-		crc = (crc << 4) ^ tab[0][crc >> 28];
-	}
-# elif CRC_BE_BITS == 8
-	while (len--) {
-		crc ^= *p++ << 24;
-		crc = (crc << 8) ^ tab[0][crc >> 24];
-	}
-# else
-	crc = (__force u32) __cpu_to_be32(crc);
-	crc = crc32_body(crc, p, len, tab);
-	crc = __be32_to_cpu((__force __be32)crc);
-# endif
+	while (len--)
+		crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++];
 	return crc;
 }
-
-#if CRC_BE_BITS == 1
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
-}
-#else
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
-}
-#endif
-EXPORT_SYMBOL(crc32_be);
+EXPORT_SYMBOL(crc32_be_base);
diff --git a/lib/crc32defs.h b/lib/crc32defs.h
deleted file mode 100644
index 0c8fb5923e7e..000000000000
--- a/lib/crc32defs.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* Try to choose an implementation variant via Kconfig */
-#ifdef CONFIG_CRC32_SLICEBY8
-# define CRC_LE_BITS 64
-# define CRC_BE_BITS 64
-#endif
-#ifdef CONFIG_CRC32_SLICEBY4
-# define CRC_LE_BITS 32
-# define CRC_BE_BITS 32
-#endif
-#ifdef CONFIG_CRC32_SARWATE
-# define CRC_LE_BITS 8
-# define CRC_BE_BITS 8
-#endif
-#ifdef CONFIG_CRC32_BIT
-# define CRC_LE_BITS 1
-# define CRC_BE_BITS 1
-#endif
-
-/*
- * How many bits at a time to use.  Valid values are 1, 2, 4, 8, 32 and 64.
- * For less performance-sensitive, use 4 or 8 to save table size.
- * For larger systems choose same as CPU architecture as default.
- * This works well on X86_64, SPARC64 systems. This may require some
- * elaboration after experiments with other architectures.
- */
-#ifndef CRC_LE_BITS
-#  ifdef CONFIG_64BIT
-#  define CRC_LE_BITS 64
-#  else
-#  define CRC_LE_BITS 32
-#  endif
-#endif
-#ifndef CRC_BE_BITS
-#  ifdef CONFIG_64BIT
-#  define CRC_BE_BITS 64
-#  else
-#  define CRC_BE_BITS 32
-#  endif
-#endif
-
-/*
- * Little-endian CRC computation.  Used with serial bit streams sent
- * lsbit-first.  Be sure to use cpu_to_le32() to append the computed CRC.
- */
-#if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \
-	CRC_LE_BITS & CRC_LE_BITS-1
-# error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32, 64}"
-#endif
-
-/*
- * Big-endian CRC computation.  Used with serial bit streams sent
- * msbit-first.  Be sure to use cpu_to_be32() to append the computed CRC.
- */
-#if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \
-	CRC_BE_BITS & CRC_BE_BITS-1
-# error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32, 64}"
-#endif
diff --git a/lib/crc32test.c b/lib/crc32test.c
deleted file mode 100644
index 9b4af79412c4..000000000000
--- a/lib/crc32test.c
+++ /dev/null
@@ -1,852 +0,0 @@
-/*
- * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
- * cleaned up code to current version of sparse and added the slicing-by-8
- * algorithm to the closely similar existing slicing-by-4 algorithm.
- *
- * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
- * Nicer crc32 functions/docs submitted by linux@horizon.com.  Thanks!
- * Code was from the public domain, copyright abandoned.  Code was
- * subsequently included in the kernel, thus was re-licensed under the
- * GNU GPL v2.
- *
- * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
- * Same crc32 function was used in 5 other places in the kernel.
- * I made one version, and deleted the others.
- * There are various incantations of crc32().  Some use a seed of 0 or ~0.
- * Some xor at the end with ~0.  The generic crc32() function takes
- * seed as an argument, and doesn't xor at the end.  Then individual
- * users can do whatever they need.
- *   drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
- *   fs/jffs2 uses seed 0, doesn't xor with ~0.
- *   fs/partitions/efi.c uses seed ~0, xor's with ~0.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/crc32.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-
-#include "crc32defs.h"
-
-/* 4096 random bytes */
-static u8 const __aligned(8) test_buf[] __initconst =
-{
-	0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
-	0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
-	0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60,
-	0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c,
-	0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4,
-	0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a,
-	0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a,
-	0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4,
-	0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9,
-	0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4,
-	0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca,
-	0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61,
-	0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e,
-	0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a,
-	0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f,
-	0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd,
-	0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c,
-	0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88,
-	0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53,
-	0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f,
-	0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4,
-	0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74,
-	0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60,
-	0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09,
-	0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07,
-	0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1,
-	0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f,
-	0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2,
-	0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0,
-	0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95,
-	0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22,
-	0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93,
-	0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86,
-	0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d,
-	0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40,
-	0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b,
-	0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35,
-	0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40,
-	0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63,
-	0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b,
-	0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8,
-	0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72,
-	0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86,
-	0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff,
-	0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed,
-	0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c,
-	0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed,
-	0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30,
-	0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99,
-	0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4,
-	0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80,
-	0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37,
-	0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04,
-	0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e,
-	0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd,
-	0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c,
-	0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09,
-	0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb,
-	0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b,
-	0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53,
-	0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b,
-	0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f,
-	0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff,
-	0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40,
-	0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6,
-	0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb,
-	0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73,
-	0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f,
-	0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4,
-	0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66,
-	0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1,
-	0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80,
-	0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f,
-	0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5,
-	0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7,
-	0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce,
-	0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff,
-	0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48,
-	0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26,
-	0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72,
-	0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88,
-	0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9,
-	0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc,
-	0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8,
-	0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09,
-	0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8,
-	0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c,
-	0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48,
-	0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d,
-	0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f,
-	0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae,
-	0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97,
-	0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8,
-	0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75,
-	0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc,
-	0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27,
-	0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf,
-	0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7,
-	0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0,
-	0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8,
-	0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c,
-	0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44,
-	0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54,
-	0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38,
-	0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f,
-	0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b,
-	0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7,
-	0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef,
-	0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e,
-	0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c,
-	0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c,
-	0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0,
-	0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37,
-	0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf,
-	0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e,
-	0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4,
-	0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60,
-	0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe,
-	0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61,
-	0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3,
-	0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe,
-	0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40,
-	0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec,
-	0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f,
-	0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7,
-	0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79,
-	0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c,
-	0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f,
-	0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21,
-	0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9,
-	0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30,
-	0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b,
-	0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee,
-	0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6,
-	0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3,
-	0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09,
-	0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd,
-	0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f,
-	0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9,
-	0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc,
-	0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59,
-	0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60,
-	0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5,
-	0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1,
-	0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8,
-	0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9,
-	0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab,
-	0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80,
-	0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01,
-	0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e,
-	0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d,
-	0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35,
-	0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38,
-	0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a,
-	0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac,
-	0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca,
-	0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57,
-	0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed,
-	0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20,
-	0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef,
-	0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c,
-	0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a,
-	0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64,
-	0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4,
-	0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54,
-	0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16,
-	0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26,
-	0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc,
-	0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87,
-	0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60,
-	0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d,
-	0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54,
-	0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13,
-	0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59,
-	0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb,
-	0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f,
-	0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15,
-	0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78,
-	0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93,
-	0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e,
-	0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31,
-	0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1,
-	0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37,
-	0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15,
-	0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78,
-	0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f,
-	0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31,
-	0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f,
-	0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc,
-	0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9,
-	0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3,
-	0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe,
-	0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4,
-	0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24,
-	0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1,
-	0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85,
-	0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8,
-	0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09,
-	0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c,
-	0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46,
-	0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5,
-	0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39,
-	0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2,
-	0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc,
-	0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35,
-	0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde,
-	0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80,
-	0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15,
-	0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63,
-	0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58,
-	0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d,
-	0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf,
-	0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12,
-	0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c,
-	0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b,
-	0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1,
-	0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6,
-	0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73,
-	0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9,
-	0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e,
-	0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22,
-	0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb,
-	0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2,
-	0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c,
-	0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c,
-	0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93,
-	0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f,
-	0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38,
-	0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57,
-	0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03,
-	0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90,
-	0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8,
-	0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4,
-	0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36,
-	0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7,
-	0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47,
-	0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46,
-	0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73,
-	0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72,
-	0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23,
-	0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a,
-	0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58,
-	0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f,
-	0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96,
-	0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9,
-	0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b,
-	0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c,
-	0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef,
-	0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3,
-	0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4,
-	0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f,
-	0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17,
-	0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18,
-	0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8,
-	0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98,
-	0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42,
-	0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97,
-	0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97,
-	0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1,
-	0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77,
-	0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb,
-	0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c,
-	0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb,
-	0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56,
-	0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04,
-	0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48,
-	0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe,
-	0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d,
-	0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97,
-	0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8,
-	0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f,
-	0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e,
-	0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca,
-	0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44,
-	0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f,
-	0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6,
-	0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63,
-	0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19,
-	0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58,
-	0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b,
-	0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28,
-	0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf,
-	0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6,
-	0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3,
-	0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe,
-	0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f,
-	0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf,
-	0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9,
-	0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e,
-	0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7,
-	0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70,
-	0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0,
-	0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d,
-	0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4,
-	0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5,
-	0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85,
-	0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc,
-	0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f,
-	0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56,
-	0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb,
-	0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b,
-	0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5,
-	0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03,
-	0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23,
-	0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03,
-	0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87,
-	0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4,
-	0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43,
-	0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11,
-	0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40,
-	0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59,
-	0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9,
-	0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30,
-	0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd,
-	0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45,
-	0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83,
-	0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b,
-	0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5,
-	0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3,
-	0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84,
-	0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8,
-	0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34,
-	0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b,
-	0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31,
-	0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b,
-	0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40,
-	0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b,
-	0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e,
-	0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38,
-	0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb,
-	0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2,
-	0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c,
-	0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1,
-	0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc,
-	0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec,
-	0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34,
-	0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95,
-	0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92,
-	0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f,
-	0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c,
-	0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b,
-	0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c,
-	0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5,
-	0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb,
-	0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4,
-	0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9,
-	0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4,
-	0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41,
-	0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a,
-	0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8,
-	0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06,
-	0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62,
-	0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47,
-	0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4,
-	0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00,
-	0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67,
-	0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81,
-	0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0,
-	0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10,
-	0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79,
-	0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19,
-	0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8,
-	0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1,
-	0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83,
-	0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86,
-	0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55,
-	0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66,
-	0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0,
-	0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49,
-	0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea,
-	0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24,
-	0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e,
-	0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88,
-	0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87,
-	0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34,
-	0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f,
-	0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a,
-	0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a,
-	0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93,
-	0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37,
-	0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38,
-	0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4,
-	0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48,
-	0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65,
-	0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09,
-	0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e,
-	0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5,
-	0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b,
-	0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4,
-	0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e,
-	0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d,
-	0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0,
-	0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5,
-	0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48,
-	0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e,
-	0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f,
-	0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a,
-	0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d,
-	0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14,
-	0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69,
-	0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53,
-	0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56,
-	0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48,
-	0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4,
-	0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26,
-	0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e,
-	0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40,
-	0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7,
-	0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62,
-	0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe,
-	0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf,
-	0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2,
-	0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d,
-	0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32,
-	0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa,
-	0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45,
-	0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04,
-	0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33,
-	0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad,
-	0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4,
-	0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c,
-	0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b,
-	0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36,
-	0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa,
-	0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9,
-	0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28,
-	0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b,
-	0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03,
-	0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d,
-	0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff,
-	0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39,
-	0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b,
-	0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2,
-	0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34,
-	0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe,
-	0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0,
-	0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27,
-	0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86,
-	0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90,
-	0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03,
-	0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb,
-	0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57,
-	0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9,
-	0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5,
-	0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16,
-	0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5,
-	0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a,
-	0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d,
-	0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0,
-	0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f,
-	0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48,
-	0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1,
-	0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09,
-	0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51,
-	0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b,
-	0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf,
-	0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe,
-	0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad,
-	0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e,
-	0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57,
-	0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f,
-	0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef,
-	0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8,
-	0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69,
-	0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d,
-	0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59,
-	0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9,
-	0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d,
-	0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea,
-	0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56,
-	0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4,
-	0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8,
-	0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78,
-	0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f,
-	0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4,
-	0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91,
-	0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f,
-	0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c,
-	0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57,
-	0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4,
-	0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23,
-	0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17,
-	0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66,
-	0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39,
-	0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36,
-	0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00,
-	0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7,
-	0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60,
-	0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c,
-	0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e,
-	0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7,
-	0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a,
-	0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d,
-	0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37,
-	0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82,
-	0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8,
-	0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e,
-	0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85,
-	0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98,
-	0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22,
-	0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7,
-	0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49,
-	0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33,
-	0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc,
-	0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8,
-	0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f,
-	0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3,
-	0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98,
-	0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c,
-	0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6,
-	0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc,
-	0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d,
-};
-
-/* 100 test cases */
-static struct crc_test {
-	u32 crc;	/* random starting crc */
-	u32 start;	/* random 6 bit offset in buf */
-	u32 length;	/* random 11 bit length of test */
-	u32 crc_le;	/* expected crc32_le result */
-	u32 crc_be;	/* expected crc32_be result */
-	u32 crc32c_le;	/* expected crc32c_le result */
-} const test[] __initconst =
-{
-	{0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
-	{0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
-	{0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
-	{0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
-	{0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
-	{0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
-	{0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
-	{0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
-	{0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
-	{0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
-	{0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
-	{0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
-	{0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
-	{0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
-	{0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
-	{0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
-	{0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
-	{0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
-	{0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
-	{0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
-	{0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
-	{0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
-	{0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
-	{0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
-	{0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
-	{0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
-	{0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
-	{0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
-	{0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
-	{0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
-	{0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
-	{0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
-	{0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
-	{0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
-	{0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
-	{0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
-	{0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
-	{0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
-	{0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
-	{0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
-	{0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
-	{0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
-	{0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
-	{0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
-	{0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
-	{0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
-	{0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
-	{0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
-	{0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
-	{0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
-	{0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
-	{0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
-	{0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
-	{0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
-	{0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
-	{0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
-	{0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
-	{0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
-	{0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
-	{0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
-	{0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
-	{0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
-	{0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
-	{0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
-	{0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
-	{0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
-	{0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
-	{0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
-	{0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
-	{0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
-	{0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
-	{0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
-	{0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
-	{0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
-	{0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
-	{0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
-	{0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
-	{0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
-	{0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
-	{0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
-	{0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
-	{0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
-	{0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
-	{0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
-	{0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
-	{0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
-	{0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
-	{0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
-	{0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
-	{0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
-	{0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
-	{0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
-	{0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
-	{0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
-	{0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
-	{0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
-	{0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
-	{0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
-	{0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
-	{0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
-};
-
-#include <linux/time.h>
-
-static int __init crc32c_test(void)
-{
-	int i;
-	int errors = 0;
-	int bytes = 0;
-	u64 nsec;
-	unsigned long flags;
-
-	/* keep static to prevent cache warming code from
-	 * getting eliminated by the compiler */
-	static u32 crc;
-
-	/* pre-warm the cache */
-	for (i = 0; i < 100; i++) {
-		bytes += test[i].length;
-
-		crc ^= __crc32c_le(test[i].crc, test_buf +
-		    test[i].start, test[i].length);
-	}
-
-	/* reduce OS noise */
-	local_irq_save(flags);
-
-	nsec = ktime_get_ns();
-	for (i = 0; i < 100; i++) {
-		if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
-		    test[i].start, test[i].length))
-			errors++;
-	}
-	nsec = ktime_get_ns() - nsec;
-
-	local_irq_restore(flags);
-
-	pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
-
-	if (errors)
-		pr_warn("crc32c: %d self tests failed\n", errors);
-	else {
-		pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n",
-			bytes, nsec);
-	}
-
-	return 0;
-}
-
-static int __init crc32c_combine_test(void)
-{
-	int i, j;
-	int errors = 0, runs = 0;
-
-	for (i = 0; i < 10; i++) {
-		u32 crc_full;
-
-		crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
-				       test[i].length);
-		for (j = 0; j <= test[i].length; ++j) {
-			u32 crc1, crc2;
-			u32 len1 = j, len2 = test[i].length - j;
-
-			crc1 = __crc32c_le(test[i].crc, test_buf +
-					   test[i].start, len1);
-			crc2 = __crc32c_le(0, test_buf + test[i].start +
-					   len1, len2);
-
-			if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
-			      crc_full == test[i].crc32c_le))
-				errors++;
-			runs++;
-			cond_resched();
-		}
-	}
-
-	if (errors)
-		pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
-	else
-		pr_info("crc32c_combine: %d self tests passed\n", runs);
-
-	return 0;
-}
-
-static int __init crc32_test(void)
-{
-	int i;
-	int errors = 0;
-	int bytes = 0;
-	u64 nsec;
-	unsigned long flags;
-
-	/* keep static to prevent cache warming code from
-	 * getting eliminated by the compiler */
-	static u32 crc;
-
-	/* pre-warm the cache */
-	for (i = 0; i < 100; i++) {
-		bytes += 2*test[i].length;
-
-		crc ^= crc32_le(test[i].crc, test_buf +
-		    test[i].start, test[i].length);
-
-		crc ^= crc32_be(test[i].crc, test_buf +
-		    test[i].start, test[i].length);
-	}
-
-	/* reduce OS noise */
-	local_irq_save(flags);
-
-	nsec = ktime_get_ns();
-	for (i = 0; i < 100; i++) {
-		if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
-		    test[i].start, test[i].length))
-			errors++;
-
-		if (test[i].crc_be != crc32_be(test[i].crc, test_buf +
-		    test[i].start, test[i].length))
-			errors++;
-	}
-	nsec = ktime_get_ns() - nsec;
-
-	local_irq_restore(flags);
-
-	pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
-		 CRC_LE_BITS, CRC_BE_BITS);
-
-	if (errors)
-		pr_warn("crc32: %d self tests failed\n", errors);
-	else {
-		pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n",
-			bytes, nsec);
-	}
-
-	return 0;
-}
-
-static int __init crc32_combine_test(void)
-{
-	int i, j;
-	int errors = 0, runs = 0;
-
-	for (i = 0; i < 10; i++) {
-		u32 crc_full;
-
-		crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
-				    test[i].length);
-		for (j = 0; j <= test[i].length; ++j) {
-			u32 crc1, crc2;
-			u32 len1 = j, len2 = test[i].length - j;
-
-			crc1 = crc32_le(test[i].crc, test_buf +
-					test[i].start, len1);
-			crc2 = crc32_le(0, test_buf + test[i].start +
-					len1, len2);
-
-			if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
-			      crc_full == test[i].crc_le))
-				errors++;
-			runs++;
-			cond_resched();
-		}
-	}
-
-	if (errors)
-		pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
-	else
-		pr_info("crc32_combine: %d self tests passed\n", runs);
-
-	return 0;
-}
-
-static int __init crc32test_init(void)
-{
-	crc32_test();
-	crc32c_test();
-
-	crc32_combine_test();
-	crc32c_combine_test();
-
-	return 0;
-}
-
-static void __exit crc32_exit(void)
-{
-}
-
-module_init(crc32test_init);
-module_exit(crc32_exit);
-
-MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
-MODULE_DESCRIPTION("CRC32 selftest");
-MODULE_LICENSE("GPL");
diff --git a/lib/crc_kunit.c b/lib/crc_kunit.c
new file mode 100644
index 000000000000..6a61d4b5fd45
--- /dev/null
+++ b/lib/crc_kunit.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Unit tests and benchmarks for the CRC library functions
+ *
+ * Copyright 2024 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+#include <kunit/test.h>
+#include <linux/crc16.h>
+#include <linux/crc-t10dif.h>
+#include <linux/crc32.h>
+#include <linux/crc32c.h>
+#include <linux/crc64.h>
+#include <linux/prandom.h>
+#include <linux/vmalloc.h>
+
+#define CRC_KUNIT_SEED			42
+#define CRC_KUNIT_MAX_LEN		16384
+#define CRC_KUNIT_NUM_TEST_ITERS	1000
+
+static struct rnd_state rng;
+static u8 *test_buffer;
+static size_t test_buflen;
+
+/**
+ * struct crc_variant - describes a CRC variant
+ * @bits: Number of bits in the CRC, 1 <= @bits <= 64.
+ * @le: true if it's a "little endian" CRC (reversed mapping between bits and
+ *	polynomial coefficients in each byte), false if it's a "big endian" CRC
+ *	(natural mapping between bits and polynomial coefficients in each byte)
+ * @poly: The generator polynomial with the highest-order term omitted.
+ *	  Bit-reversed if @le is true.
+ * @func: The function to compute a CRC.  The type signature uses u64 so that it
+ *	  can fit any CRC up to CRC-64.
+ * @combine_func: Optional function to combine two CRCs.
+ */
+struct crc_variant {
+	int bits;
+	bool le;
+	u64 poly;
+	u64 (*func)(u64 crc, const u8 *p, size_t len);
+	u64 (*combine_func)(u64 crc1, u64 crc2, size_t len2);
+};
+
+static u32 rand32(void)
+{
+	return prandom_u32_state(&rng);
+}
+
+static u64 rand64(void)
+{
+	u32 n = rand32();
+
+	return ((u64)n << 32) | rand32();
+}
+
+static u64 crc_mask(const struct crc_variant *v)
+{
+	return (u64)-1 >> (64 - v->bits);
+}
+
+/* Reference implementation of any CRC variant */
+static u64 crc_ref(const struct crc_variant *v,
+		   u64 crc, const u8 *p, size_t len)
+{
+	size_t i, j;
+
+	for (i = 0; i < len; i++) {
+		for (j = 0; j < 8; j++) {
+			if (v->le) {
+				crc ^= (p[i] >> j) & 1;
+				crc = (crc >> 1) ^ ((crc & 1) ? v->poly : 0);
+			} else {
+				crc ^= (u64)((p[i] >> (7 - j)) & 1) <<
+				       (v->bits - 1);
+				if (crc & (1ULL << (v->bits - 1)))
+					crc = ((crc << 1) ^ v->poly) &
+					      crc_mask(v);
+				else
+					crc <<= 1;
+			}
+		}
+	}
+	return crc;
+}
+
+static int crc_suite_init(struct kunit_suite *suite)
+{
+	/*
+	 * Allocate the test buffer using vmalloc() with a page-aligned length
+	 * so that it is immediately followed by a guard page.  This allows
+	 * buffer overreads to be detected, even in assembly code.
+	 */
+	test_buflen = round_up(CRC_KUNIT_MAX_LEN, PAGE_SIZE);
+	test_buffer = vmalloc(test_buflen);
+	if (!test_buffer)
+		return -ENOMEM;
+
+	prandom_seed_state(&rng, CRC_KUNIT_SEED);
+	prandom_bytes_state(&rng, test_buffer, test_buflen);
+	return 0;
+}
+
+static void crc_suite_exit(struct kunit_suite *suite)
+{
+	vfree(test_buffer);
+	test_buffer = NULL;
+}
+
+/* Generate a random initial CRC. */
+static u64 generate_random_initial_crc(const struct crc_variant *v)
+{
+	switch (rand32() % 4) {
+	case 0:
+		return 0;
+	case 1:
+		return crc_mask(v); /* All 1 bits */
+	default:
+		return rand64() & crc_mask(v);
+	}
+}
+
+/* Generate a random length, preferring small lengths. */
+static size_t generate_random_length(size_t max_length)
+{
+	size_t len;
+
+	switch (rand32() % 3) {
+	case 0:
+		len = rand32() % 128;
+		break;
+	case 1:
+		len = rand32() % 3072;
+		break;
+	default:
+		len = rand32();
+		break;
+	}
+	return len % (max_length + 1);
+}
+
+/* Test that v->func gives the same CRCs as a reference implementation. */
+static void crc_main_test(struct kunit *test, const struct crc_variant *v)
+{
+	size_t i;
+
+	for (i = 0; i < CRC_KUNIT_NUM_TEST_ITERS; i++) {
+		u64 init_crc, expected_crc, actual_crc;
+		size_t len, offset;
+		bool nosimd;
+
+		init_crc = generate_random_initial_crc(v);
+		len = generate_random_length(CRC_KUNIT_MAX_LEN);
+
+		/* Generate a random offset. */
+		if (rand32() % 2 == 0) {
+			/* Use a random alignment mod 64 */
+			offset = rand32() % 64;
+			offset = min(offset, CRC_KUNIT_MAX_LEN - len);
+		} else {
+			/* Go up to the guard page, to catch buffer overreads */
+			offset = test_buflen - len;
+		}
+
+		if (rand32() % 8 == 0)
+			/* Refresh the data occasionally. */
+			prandom_bytes_state(&rng, &test_buffer[offset], len);
+
+		nosimd = rand32() % 8 == 0;
+
+		/*
+		 * Compute the CRC, and verify that it equals the CRC computed
+		 * by a simple bit-at-a-time reference implementation.
+		 */
+		expected_crc = crc_ref(v, init_crc, &test_buffer[offset], len);
+		if (nosimd)
+			local_irq_disable();
+		actual_crc = v->func(init_crc, &test_buffer[offset], len);
+		if (nosimd)
+			local_irq_enable();
+		KUNIT_EXPECT_EQ_MSG(test, expected_crc, actual_crc,
+				    "Wrong result with len=%zu offset=%zu nosimd=%d",
+				    len, offset, nosimd);
+	}
+}
+
+/* Test that CRC(concat(A, B)) == combine_CRCs(CRC(A), CRC(B), len(B)). */
+static void crc_combine_test(struct kunit *test, const struct crc_variant *v)
+{
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		u64 init_crc = generate_random_initial_crc(v);
+		size_t len1 = generate_random_length(CRC_KUNIT_MAX_LEN);
+		size_t len2 = generate_random_length(CRC_KUNIT_MAX_LEN - len1);
+		u64 crc1, crc2, expected_crc, actual_crc;
+
+		prandom_bytes_state(&rng, test_buffer, len1 + len2);
+		crc1 = v->func(init_crc, test_buffer, len1);
+		crc2 = v->func(0, &test_buffer[len1], len2);
+		expected_crc = v->func(init_crc, test_buffer, len1 + len2);
+		actual_crc = v->combine_func(crc1, crc2, len2);
+		KUNIT_EXPECT_EQ_MSG(test, expected_crc, actual_crc,
+				    "CRC combination gave wrong result with len1=%zu len2=%zu\n",
+				    len1, len2);
+	}
+}
+
+static void crc_test(struct kunit *test, const struct crc_variant *v)
+{
+	crc_main_test(test, v);
+	if (v->combine_func)
+		crc_combine_test(test, v);
+}
+
+static __always_inline void
+crc_benchmark(struct kunit *test,
+	      u64 (*crc_func)(u64 crc, const u8 *p, size_t len))
+{
+	static const size_t lens_to_test[] = {
+		1, 16, 64, 127, 128, 200, 256, 511, 512, 1024, 3173, 4096, 16384,
+	};
+	size_t len, i, j, num_iters;
+	/*
+	 * Some of the CRC library functions are marked as __pure, so use
+	 * volatile to ensure that all calls are really made as intended.
+	 */
+	volatile u64 crc = 0;
+	u64 t;
+
+	if (!IS_ENABLED(CONFIG_CRC_BENCHMARK))
+		kunit_skip(test, "not enabled");
+
+	/* warm-up */
+	for (i = 0; i < 10000000; i += CRC_KUNIT_MAX_LEN)
+		crc = crc_func(crc, test_buffer, CRC_KUNIT_MAX_LEN);
+
+	for (i = 0; i < ARRAY_SIZE(lens_to_test); i++) {
+		len = lens_to_test[i];
+		KUNIT_ASSERT_LE(test, len, CRC_KUNIT_MAX_LEN);
+		num_iters = 10000000 / (len + 128);
+		preempt_disable();
+		t = ktime_get_ns();
+		for (j = 0; j < num_iters; j++)
+			crc = crc_func(crc, test_buffer, len);
+		t = ktime_get_ns() - t;
+		preempt_enable();
+		kunit_info(test, "len=%zu: %llu MB/s\n",
+			   len, div64_u64((u64)len * num_iters * 1000, t));
+	}
+}
+
+/* crc16 */
+
+static u64 crc16_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	return crc16(crc, p, len);
+}
+
+static const struct crc_variant crc_variant_crc16 = {
+	.bits = 16,
+	.le = true,
+	.poly = 0xa001,
+	.func = crc16_wrapper,
+};
+
+static void crc16_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc16);
+}
+
+static void crc16_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc16_wrapper);
+}
+
+/* crc_t10dif */
+
+static u64 crc_t10dif_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	return crc_t10dif_update(crc, p, len);
+}
+
+static const struct crc_variant crc_variant_crc_t10dif = {
+	.bits = 16,
+	.le = false,
+	.poly = 0x8bb7,
+	.func = crc_t10dif_wrapper,
+};
+
+static void crc_t10dif_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc_t10dif);
+}
+
+static void crc_t10dif_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc_t10dif_wrapper);
+}
+
+/* crc32_le */
+
+static u64 crc32_le_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	return crc32_le(crc, p, len);
+}
+
+static u64 crc32_le_combine_wrapper(u64 crc1, u64 crc2, size_t len2)
+{
+	return crc32_le_combine(crc1, crc2, len2);
+}
+
+static const struct crc_variant crc_variant_crc32_le = {
+	.bits = 32,
+	.le = true,
+	.poly = 0xedb88320,
+	.func = crc32_le_wrapper,
+	.combine_func = crc32_le_combine_wrapper,
+};
+
+static void crc32_le_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc32_le);
+}
+
+static void crc32_le_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc32_le_wrapper);
+}
+
+/* crc32_be */
+
+static u64 crc32_be_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	return crc32_be(crc, p, len);
+}
+
+static const struct crc_variant crc_variant_crc32_be = {
+	.bits = 32,
+	.le = false,
+	.poly = 0x04c11db7,
+	.func = crc32_be_wrapper,
+};
+
+static void crc32_be_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc32_be);
+}
+
+static void crc32_be_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc32_be_wrapper);
+}
+
+/* crc32c */
+
+static u64 crc32c_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	return crc32c(crc, p, len);
+}
+
+static u64 crc32c_combine_wrapper(u64 crc1, u64 crc2, size_t len2)
+{
+	return __crc32c_le_combine(crc1, crc2, len2);
+}
+
+static const struct crc_variant crc_variant_crc32c = {
+	.bits = 32,
+	.le = true,
+	.poly = 0x82f63b78,
+	.func = crc32c_wrapper,
+	.combine_func = crc32c_combine_wrapper,
+};
+
+static void crc32c_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc32c);
+}
+
+static void crc32c_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc32c_wrapper);
+}
+
+/* crc64_be */
+
+static u64 crc64_be_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	return crc64_be(crc, p, len);
+}
+
+static const struct crc_variant crc_variant_crc64_be = {
+	.bits = 64,
+	.le = false,
+	.poly = 0x42f0e1eba9ea3693,
+	.func = crc64_be_wrapper,
+};
+
+static void crc64_be_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc64_be);
+}
+
+static void crc64_be_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc64_be_wrapper);
+}
+
+static struct kunit_case crc_test_cases[] = {
+	KUNIT_CASE(crc16_test),
+	KUNIT_CASE(crc16_benchmark),
+	KUNIT_CASE(crc_t10dif_test),
+	KUNIT_CASE(crc_t10dif_benchmark),
+	KUNIT_CASE(crc32_le_test),
+	KUNIT_CASE(crc32_le_benchmark),
+	KUNIT_CASE(crc32_be_test),
+	KUNIT_CASE(crc32_be_benchmark),
+	KUNIT_CASE(crc32c_test),
+	KUNIT_CASE(crc32c_benchmark),
+	KUNIT_CASE(crc64_be_test),
+	KUNIT_CASE(crc64_be_benchmark),
+	{},
+};
+
+static struct kunit_suite crc_test_suite = {
+	.name = "crc",
+	.test_cases = crc_test_cases,
+	.suite_init = crc_suite_init,
+	.suite_exit = crc_suite_exit,
+};
+kunit_test_suite(crc_test_suite);
+
+MODULE_DESCRIPTION("Unit tests and benchmarks for the CRC library functions");
+MODULE_LICENSE("GPL");
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 969baab8c805..01fac1cd05a1 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -58,3 +58,5 @@ libcurve25519-y					+= curve25519-selftest.o
 endif
 
 obj-$(CONFIG_MPILIB) += mpi/
+
+obj-$(CONFIG_CRYPTO_MANAGER_EXTRA_TESTS)	+= simd.o
diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index 827fe89922ff..eafe14d021f5 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -6,7 +6,7 @@
 #include <crypto/aes.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 /*
  * Emit the sbox as volatile const to prevent the compiler from doing
diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c
index 6bba6473fdf3..902e49410aaf 100644
--- a/lib/crypto/aesgcm.c
+++ b/lib/crypto/aesgcm.c
@@ -697,7 +697,7 @@ static int __init libaesgcm_init(void)
 		u8 tagbuf[AES_BLOCK_SIZE];
 		int plen = aesgcm_tv[i].plen;
 		struct aesgcm_ctx ctx;
-		u8 buf[sizeof(ptext12)];
+		static u8 buf[sizeof(ptext12)];
 
 		if (aesgcm_expandkey(&ctx, aesgcm_tv[i].key, aesgcm_tv[i].klen,
 				     aesgcm_tv[i].clen - plen)) {
diff --git a/lib/crypto/arc4.c b/lib/crypto/arc4.c
index c2020f19c652..838812d18216 100644
--- a/lib/crypto/arc4.c
+++ b/lib/crypto/arc4.c
@@ -71,4 +71,5 @@ void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
 }
 EXPORT_SYMBOL(arc4_crypt);
 
+MODULE_DESCRIPTION("ARC4 Cipher Algorithm");
 MODULE_LICENSE("GPL");
diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c
index 3b6dcfdd9628..09682136b57c 100644
--- a/lib/crypto/blake2s-generic.c
+++ b/lib/crypto/blake2s-generic.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/bug.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 static const u8 blake2s_sigma[10][16] = {
 	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c
index b748fd3d256e..3cdda3b5ee06 100644
--- a/lib/crypto/chacha.c
+++ b/lib/crypto/chacha.c
@@ -10,7 +10,7 @@
 #include <linux/export.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/chacha.h>
 
 static void chacha_permute(u32 *x, int nrounds)
diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
index fa43deda2660..2ea61c28be4f 100644
--- a/lib/crypto/chacha20poly1305-selftest.c
+++ b/lib/crypto/chacha20poly1305-selftest.c
@@ -7,7 +7,7 @@
 #include <crypto/chacha.h>
 #include <crypto/poly1305.h>
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <linux/bug.h>
 #include <linux/init.h>
 #include <linux/mm.h>
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index fa6a9440fc95..a839c0ac60b2 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -13,7 +13,7 @@
 #include <crypto/poly1305.h>
 #include <crypto/scatterwalk.h>
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/mm.h>
diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c
index 2fde0ec33dbd..2e0ba634e299 100644
--- a/lib/crypto/curve25519-fiat32.c
+++ b/lib/crypto/curve25519-fiat32.c
@@ -10,7 +10,7 @@
  * with 128-bit integer types.
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/curve25519.h>
 #include <linux/string.h>
 
diff --git a/lib/crypto/curve25519-hacl64.c b/lib/crypto/curve25519-hacl64.c
index c40e5d913234..c4204133afb7 100644
--- a/lib/crypto/curve25519-hacl64.c
+++ b/lib/crypto/curve25519-hacl64.c
@@ -10,7 +10,7 @@
  * integer types.
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/curve25519.h>
 #include <linux/string.h>
 
diff --git a/lib/crypto/des.c b/lib/crypto/des.c
index ef5bb8822aba..d3423b34a8e9 100644
--- a/lib/crypto/des.c
+++ b/lib/crypto/des.c
@@ -17,7 +17,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #include <crypto/des.h>
 #include <crypto/internal/des.h>
@@ -899,4 +899,5 @@ void des3_ede_decrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src)
 }
 EXPORT_SYMBOL_GPL(des3_ede_decrypt);
 
+MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms");
 MODULE_LICENSE("GPL");
diff --git a/lib/crypto/gf128mul.c b/lib/crypto/gf128mul.c
index 8f8c45e0cdcf..fbe72cb3453a 100644
--- a/lib/crypto/gf128mul.c
+++ b/lib/crypto/gf128mul.c
@@ -225,44 +225,6 @@ void gf128mul_lle(be128 *r, const be128 *b)
 }
 EXPORT_SYMBOL(gf128mul_lle);
 
-void gf128mul_bbe(be128 *r, const be128 *b)
-{
-	be128 p[8];
-	int i;
-
-	p[0] = *r;
-	for (i = 0; i < 7; ++i)
-		gf128mul_x_bbe(&p[i + 1], &p[i]);
-
-	memset(r, 0, sizeof(*r));
-	for (i = 0;;) {
-		u8 ch = ((u8 *)b)[i];
-
-		if (ch & 0x80)
-			be128_xor(r, r, &p[7]);
-		if (ch & 0x40)
-			be128_xor(r, r, &p[6]);
-		if (ch & 0x20)
-			be128_xor(r, r, &p[5]);
-		if (ch & 0x10)
-			be128_xor(r, r, &p[4]);
-		if (ch & 0x08)
-			be128_xor(r, r, &p[3]);
-		if (ch & 0x04)
-			be128_xor(r, r, &p[2]);
-		if (ch & 0x02)
-			be128_xor(r, r, &p[1]);
-		if (ch & 0x01)
-			be128_xor(r, r, &p[0]);
-
-		if (++i >= 16)
-			break;
-
-		gf128mul_x8_bbe(r);
-	}
-}
-EXPORT_SYMBOL(gf128mul_bbe);
-
 /*      This version uses 64k bytes of table space.
     A 16 byte buffer has to be multiplied by a 16 byte key
     value in GF(2^128).  If we consider a GF(2^128) value in
@@ -380,28 +342,6 @@ out:
 }
 EXPORT_SYMBOL(gf128mul_init_4k_lle);
 
-struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
-{
-	struct gf128mul_4k *t;
-	int j, k;
-
-	t = kzalloc(sizeof(*t), GFP_KERNEL);
-	if (!t)
-		goto out;
-
-	t->t[1] = *g;
-	for (j = 1; j <= 64; j <<= 1)
-		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
-
-	for (j = 2; j < 256; j += j)
-		for (k = 1; k < j; ++k)
-			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
-
-out:
-	return t;
-}
-EXPORT_SYMBOL(gf128mul_init_4k_bbe);
-
 void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
 {
 	u8 *ap = (u8 *)a;
@@ -417,20 +357,5 @@ void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
 }
 EXPORT_SYMBOL(gf128mul_4k_lle);
 
-void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
-{
-	u8 *ap = (u8 *)a;
-	be128 r[1];
-	int i = 0;
-
-	*r = t->t[ap[0]];
-	while (++i < 16) {
-		gf128mul_x8_bbe(r);
-		be128_xor(r, r, &t->t[ap[i]]);
-	}
-	*a = *r;
-}
-EXPORT_SYMBOL(gf128mul_4k_bbe);
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c
index dabc3accae05..cc1be0496eb9 100644
--- a/lib/crypto/libchacha.c
+++ b/lib/crypto/libchacha.c
@@ -32,4 +32,5 @@ void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
 }
 EXPORT_SYMBOL(chacha_crypt_generic);
 
+MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)");
 MODULE_LICENSE("GPL");
diff --git a/lib/crypto/memneq.c b/lib/crypto/memneq.c
index 243d8677cc51..a2afd10349c9 100644
--- a/lib/crypto/memneq.c
+++ b/lib/crypto/memneq.c
@@ -59,7 +59,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/algapi.h>
 #include <linux/module.h>
 
diff --git a/lib/crypto/mpi/Makefile b/lib/crypto/mpi/Makefile
index 6e6ef9a34fe1..9ad84079025a 100644
--- a/lib/crypto/mpi/Makefile
+++ b/lib/crypto/mpi/Makefile
@@ -13,14 +13,12 @@ mpi-y = \
 	generic_mpih-rshift.o		\
 	generic_mpih-sub1.o		\
 	generic_mpih-add1.o		\
-	ec.o				\
 	mpicoder.o			\
 	mpi-add.o			\
 	mpi-bit.o			\
 	mpi-cmp.o			\
 	mpi-sub-ui.o			\
 	mpi-div.o			\
-	mpi-inv.o			\
 	mpi-mod.o			\
 	mpi-mul.o			\
 	mpih-cmp.o			\
diff --git a/lib/crypto/mpi/ec.c b/lib/crypto/mpi/ec.c
deleted file mode 100644
index e16dca1e23d5..000000000000
--- a/lib/crypto/mpi/ec.c
+++ /dev/null
@@ -1,1509 +0,0 @@
-/* ec.c -  Elliptic Curve functions
- * Copyright (C) 2007 Free Software Foundation, Inc.
- * Copyright (C) 2013 g10 Code GmbH
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "mpi-internal.h"
-#include "longlong.h"
-
-#define point_init(a)  mpi_point_init((a))
-#define point_free(a)  mpi_point_free_parts((a))
-
-#define log_error(fmt, ...) pr_err(fmt, ##__VA_ARGS__)
-#define log_fatal(fmt, ...) pr_err(fmt, ##__VA_ARGS__)
-
-#define DIM(v) (sizeof(v)/sizeof((v)[0]))
-
-
-/* Create a new point option.  NBITS gives the size in bits of one
- * coordinate; it is only used to pre-allocate some resources and
- * might also be passed as 0 to use a default value.
- */
-MPI_POINT mpi_point_new(unsigned int nbits)
-{
-	MPI_POINT p;
-
-	(void)nbits;  /* Currently not used.  */
-
-	p = kmalloc(sizeof(*p), GFP_KERNEL);
-	if (p)
-		mpi_point_init(p);
-	return p;
-}
-EXPORT_SYMBOL_GPL(mpi_point_new);
-
-/* Release the point object P.  P may be NULL. */
-void mpi_point_release(MPI_POINT p)
-{
-	if (p) {
-		mpi_point_free_parts(p);
-		kfree(p);
-	}
-}
-EXPORT_SYMBOL_GPL(mpi_point_release);
-
-/* Initialize the fields of a point object.  gcry_mpi_point_free_parts
- * may be used to release the fields.
- */
-void mpi_point_init(MPI_POINT p)
-{
-	p->x = mpi_new(0);
-	p->y = mpi_new(0);
-	p->z = mpi_new(0);
-}
-EXPORT_SYMBOL_GPL(mpi_point_init);
-
-/* Release the parts of a point object. */
-void mpi_point_free_parts(MPI_POINT p)
-{
-	mpi_free(p->x); p->x = NULL;
-	mpi_free(p->y); p->y = NULL;
-	mpi_free(p->z); p->z = NULL;
-}
-EXPORT_SYMBOL_GPL(mpi_point_free_parts);
-
-/* Set the value from S into D.  */
-static void point_set(MPI_POINT d, MPI_POINT s)
-{
-	mpi_set(d->x, s->x);
-	mpi_set(d->y, s->y);
-	mpi_set(d->z, s->z);
-}
-
-static void point_resize(MPI_POINT p, struct mpi_ec_ctx *ctx)
-{
-	size_t nlimbs = ctx->p->nlimbs;
-
-	mpi_resize(p->x, nlimbs);
-	p->x->nlimbs = nlimbs;
-	mpi_resize(p->z, nlimbs);
-	p->z->nlimbs = nlimbs;
-
-	if (ctx->model != MPI_EC_MONTGOMERY) {
-		mpi_resize(p->y, nlimbs);
-		p->y->nlimbs = nlimbs;
-	}
-}
-
-static void point_swap_cond(MPI_POINT d, MPI_POINT s, unsigned long swap,
-		struct mpi_ec_ctx *ctx)
-{
-	mpi_swap_cond(d->x, s->x, swap);
-	if (ctx->model != MPI_EC_MONTGOMERY)
-		mpi_swap_cond(d->y, s->y, swap);
-	mpi_swap_cond(d->z, s->z, swap);
-}
-
-
-/* W = W mod P.  */
-static void ec_mod(MPI w, struct mpi_ec_ctx *ec)
-{
-	if (ec->t.p_barrett)
-		mpi_mod_barrett(w, w, ec->t.p_barrett);
-	else
-		mpi_mod(w, w, ec->p);
-}
-
-static void ec_addm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_add(w, u, v);
-	ec_mod(w, ctx);
-}
-
-static void ec_subm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec)
-{
-	mpi_sub(w, u, v);
-	while (w->sign)
-		mpi_add(w, w, ec->p);
-	/*ec_mod(w, ec);*/
-}
-
-static void ec_mulm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_mul(w, u, v);
-	ec_mod(w, ctx);
-}
-
-/* W = 2 * U mod P.  */
-static void ec_mul2(MPI w, MPI u, struct mpi_ec_ctx *ctx)
-{
-	mpi_lshift(w, u, 1);
-	ec_mod(w, ctx);
-}
-
-static void ec_powm(MPI w, const MPI b, const MPI e,
-		struct mpi_ec_ctx *ctx)
-{
-	mpi_powm(w, b, e, ctx->p);
-	/* mpi_abs(w); */
-}
-
-/* Shortcut for
- * ec_powm(B, B, mpi_const(MPI_C_TWO), ctx);
- * for easier optimization.
- */
-static void ec_pow2(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
-{
-	/* Using mpi_mul is slightly faster (at least on amd64).  */
-	/* mpi_powm(w, b, mpi_const(MPI_C_TWO), ctx->p); */
-	ec_mulm(w, b, b, ctx);
-}
-
-/* Shortcut for
- * ec_powm(B, B, mpi_const(MPI_C_THREE), ctx);
- * for easier optimization.
- */
-static void ec_pow3(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
-{
-	mpi_powm(w, b, mpi_const(MPI_C_THREE), ctx->p);
-}
-
-static void ec_invm(MPI x, MPI a, struct mpi_ec_ctx *ctx)
-{
-	if (!mpi_invm(x, a, ctx->p))
-		log_error("ec_invm: inverse does not exist:\n");
-}
-
-static void mpih_set_cond(mpi_ptr_t wp, mpi_ptr_t up,
-		mpi_size_t usize, unsigned long set)
-{
-	mpi_size_t i;
-	mpi_limb_t mask = ((mpi_limb_t)0) - set;
-	mpi_limb_t x;
-
-	for (i = 0; i < usize; i++) {
-		x = mask & (wp[i] ^ up[i]);
-		wp[i] = wp[i] ^ x;
-	}
-}
-
-/* Routines for 2^255 - 19.  */
-
-#define LIMB_SIZE_25519 ((256+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
-
-static void ec_addm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_ptr_t wp, up, vp;
-	mpi_size_t wsize = LIMB_SIZE_25519;
-	mpi_limb_t n[LIMB_SIZE_25519];
-	mpi_limb_t borrow;
-
-	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
-		log_bug("addm_25519: different sizes\n");
-
-	memset(n, 0, sizeof(n));
-	up = u->d;
-	vp = v->d;
-	wp = w->d;
-
-	mpihelp_add_n(wp, up, vp, wsize);
-	borrow = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
-	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
-	mpihelp_add_n(wp, wp, n, wsize);
-	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
-}
-
-static void ec_subm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_ptr_t wp, up, vp;
-	mpi_size_t wsize = LIMB_SIZE_25519;
-	mpi_limb_t n[LIMB_SIZE_25519];
-	mpi_limb_t borrow;
-
-	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
-		log_bug("subm_25519: different sizes\n");
-
-	memset(n, 0, sizeof(n));
-	up = u->d;
-	vp = v->d;
-	wp = w->d;
-
-	borrow = mpihelp_sub_n(wp, up, vp, wsize);
-	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
-	mpihelp_add_n(wp, wp, n, wsize);
-	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
-}
-
-static void ec_mulm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_ptr_t wp, up, vp;
-	mpi_size_t wsize = LIMB_SIZE_25519;
-	mpi_limb_t n[LIMB_SIZE_25519*2];
-	mpi_limb_t m[LIMB_SIZE_25519+1];
-	mpi_limb_t cy;
-	int msb;
-
-	(void)ctx;
-	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
-		log_bug("mulm_25519: different sizes\n");
-
-	up = u->d;
-	vp = v->d;
-	wp = w->d;
-
-	mpihelp_mul_n(n, up, vp, wsize);
-	memcpy(wp, n, wsize * BYTES_PER_MPI_LIMB);
-	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
-
-	memcpy(m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB);
-	mpihelp_rshift(m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB));
-
-	memcpy(n, m, wsize * BYTES_PER_MPI_LIMB);
-	cy = mpihelp_lshift(m, m, LIMB_SIZE_25519, 4);
-	m[LIMB_SIZE_25519] = cy;
-	cy = mpihelp_add_n(m, m, n, wsize);
-	m[LIMB_SIZE_25519] += cy;
-	cy = mpihelp_add_n(m, m, n, wsize);
-	m[LIMB_SIZE_25519] += cy;
-	cy = mpihelp_add_n(m, m, n, wsize);
-	m[LIMB_SIZE_25519] += cy;
-
-	cy = mpihelp_add_n(wp, wp, m, wsize);
-	m[LIMB_SIZE_25519] += cy;
-
-	memset(m, 0, wsize * BYTES_PER_MPI_LIMB);
-	msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB));
-	m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19;
-	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
-	mpihelp_add_n(wp, wp, m, wsize);
-
-	m[0] = 0;
-	cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
-	mpih_set_cond(m, ctx->p->d, wsize, (cy != 0UL));
-	mpihelp_add_n(wp, wp, m, wsize);
-}
-
-static void ec_mul2_25519(MPI w, MPI u, struct mpi_ec_ctx *ctx)
-{
-	ec_addm_25519(w, u, u, ctx);
-}
-
-static void ec_pow2_25519(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
-{
-	ec_mulm_25519(w, b, b, ctx);
-}
-
-/* Routines for 2^448 - 2^224 - 1.  */
-
-#define LIMB_SIZE_448 ((448+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
-#define LIMB_SIZE_HALF_448 ((LIMB_SIZE_448+1)/2)
-
-static void ec_addm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_ptr_t wp, up, vp;
-	mpi_size_t wsize = LIMB_SIZE_448;
-	mpi_limb_t n[LIMB_SIZE_448];
-	mpi_limb_t cy;
-
-	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
-		log_bug("addm_448: different sizes\n");
-
-	memset(n, 0, sizeof(n));
-	up = u->d;
-	vp = v->d;
-	wp = w->d;
-
-	cy = mpihelp_add_n(wp, up, vp, wsize);
-	mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL));
-	mpihelp_sub_n(wp, wp, n, wsize);
-}
-
-static void ec_subm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_ptr_t wp, up, vp;
-	mpi_size_t wsize = LIMB_SIZE_448;
-	mpi_limb_t n[LIMB_SIZE_448];
-	mpi_limb_t borrow;
-
-	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
-		log_bug("subm_448: different sizes\n");
-
-	memset(n, 0, sizeof(n));
-	up = u->d;
-	vp = v->d;
-	wp = w->d;
-
-	borrow = mpihelp_sub_n(wp, up, vp, wsize);
-	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
-	mpihelp_add_n(wp, wp, n, wsize);
-}
-
-static void ec_mulm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
-{
-	mpi_ptr_t wp, up, vp;
-	mpi_size_t wsize = LIMB_SIZE_448;
-	mpi_limb_t n[LIMB_SIZE_448*2];
-	mpi_limb_t a2[LIMB_SIZE_HALF_448];
-	mpi_limb_t a3[LIMB_SIZE_HALF_448];
-	mpi_limb_t b0[LIMB_SIZE_HALF_448];
-	mpi_limb_t b1[LIMB_SIZE_HALF_448];
-	mpi_limb_t cy;
-	int i;
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	mpi_limb_t b1_rest, a3_rest;
-#endif
-
-	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
-		log_bug("mulm_448: different sizes\n");
-
-	up = u->d;
-	vp = v->d;
-	wp = w->d;
-
-	mpihelp_mul_n(n, up, vp, wsize);
-
-	for (i = 0; i < (wsize + 1) / 2; i++) {
-		b0[i] = n[i];
-		b1[i] = n[i+wsize/2];
-		a2[i] = n[i+wsize];
-		a3[i] = n[i+wsize+wsize/2];
-	}
-
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	b0[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1;
-	a2[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1;
-
-	b1_rest = 0;
-	a3_rest = 0;
-
-	for (i = (wsize + 1) / 2 - 1; i >= 0; i--) {
-		mpi_limb_t b1v, a3v;
-		b1v = b1[i];
-		a3v = a3[i];
-		b1[i] = (b1_rest << 32) | (b1v >> 32);
-		a3[i] = (a3_rest << 32) | (a3v >> 32);
-		b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1);
-		a3_rest = a3v & (((mpi_limb_t)1UL << 32)-1);
-	}
-#endif
-
-	cy = mpihelp_add_n(b0, b0, a2, LIMB_SIZE_HALF_448);
-	cy += mpihelp_add_n(b0, b0, a3, LIMB_SIZE_HALF_448);
-	for (i = 0; i < (wsize + 1) / 2; i++)
-		wp[i] = b0[i];
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	wp[LIMB_SIZE_HALF_448-1] &= (((mpi_limb_t)1UL << 32)-1);
-#endif
-
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	cy = b0[LIMB_SIZE_HALF_448-1] >> 32;
-#endif
-
-	cy = mpihelp_add_1(b1, b1, LIMB_SIZE_HALF_448, cy);
-	cy += mpihelp_add_n(b1, b1, a2, LIMB_SIZE_HALF_448);
-	cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448);
-	cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448);
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	b1_rest = 0;
-	for (i = (wsize + 1) / 2 - 1; i >= 0; i--) {
-		mpi_limb_t b1v = b1[i];
-		b1[i] = (b1_rest << 32) | (b1v >> 32);
-		b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1);
-	}
-	wp[LIMB_SIZE_HALF_448-1] |= (b1_rest << 32);
-#endif
-	for (i = 0; i < wsize / 2; i++)
-		wp[i+(wsize + 1) / 2] = b1[i];
-
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	cy = b1[LIMB_SIZE_HALF_448-1];
-#endif
-
-	memset(n, 0, wsize * BYTES_PER_MPI_LIMB);
-
-#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
-	n[LIMB_SIZE_HALF_448-1] = cy << 32;
-#else
-	n[LIMB_SIZE_HALF_448] = cy;
-#endif
-	n[0] = cy;
-	mpihelp_add_n(wp, wp, n, wsize);
-
-	memset(n, 0, wsize * BYTES_PER_MPI_LIMB);
-	cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
-	mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL));
-	mpihelp_add_n(wp, wp, n, wsize);
-}
-
-static void ec_mul2_448(MPI w, MPI u, struct mpi_ec_ctx *ctx)
-{
-	ec_addm_448(w, u, u, ctx);
-}
-
-static void ec_pow2_448(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
-{
-	ec_mulm_448(w, b, b, ctx);
-}
-
-struct field_table {
-	const char *p;
-
-	/* computation routines for the field.  */
-	void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
-	void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
-	void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
-	void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx);
-	void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx);
-};
-
-static const struct field_table field_table[] = {
-	{
-		"0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
-		ec_addm_25519,
-		ec_subm_25519,
-		ec_mulm_25519,
-		ec_mul2_25519,
-		ec_pow2_25519
-	},
-	{
-		"0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
-		"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
-		ec_addm_448,
-		ec_subm_448,
-		ec_mulm_448,
-		ec_mul2_448,
-		ec_pow2_448
-	},
-	{ NULL, NULL, NULL, NULL, NULL, NULL },
-};
-
-/* Force recomputation of all helper variables.  */
-static void mpi_ec_get_reset(struct mpi_ec_ctx *ec)
-{
-	ec->t.valid.a_is_pminus3 = 0;
-	ec->t.valid.two_inv_p = 0;
-}
-
-/* Accessor for helper variable.  */
-static int ec_get_a_is_pminus3(struct mpi_ec_ctx *ec)
-{
-	MPI tmp;
-
-	if (!ec->t.valid.a_is_pminus3) {
-		ec->t.valid.a_is_pminus3 = 1;
-		tmp = mpi_alloc_like(ec->p);
-		mpi_sub_ui(tmp, ec->p, 3);
-		ec->t.a_is_pminus3 = !mpi_cmp(ec->a, tmp);
-		mpi_free(tmp);
-	}
-
-	return ec->t.a_is_pminus3;
-}
-
-/* Accessor for helper variable.  */
-static MPI ec_get_two_inv_p(struct mpi_ec_ctx *ec)
-{
-	if (!ec->t.valid.two_inv_p) {
-		ec->t.valid.two_inv_p = 1;
-		if (!ec->t.two_inv_p)
-			ec->t.two_inv_p = mpi_alloc(0);
-		ec_invm(ec->t.two_inv_p, mpi_const(MPI_C_TWO), ec);
-	}
-	return ec->t.two_inv_p;
-}
-
-static const char *const curve25519_bad_points[] = {
-	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed",
-	"0x0000000000000000000000000000000000000000000000000000000000000000",
-	"0x0000000000000000000000000000000000000000000000000000000000000001",
-	"0x00b8495f16056286fdb1329ceb8d09da6ac49ff1fae35616aeb8413b7c7aebe0",
-	"0x57119fd0dd4e22d8868e1c58c45c44045bef839c55b1d0b1248c50a3bc959c5f",
-	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec",
-	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee",
-	NULL
-};
-
-static const char *const curve448_bad_points[] = {
-	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
-	"ffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
-	"0x00000000000000000000000000000000000000000000000000000000"
-	"00000000000000000000000000000000000000000000000000000000",
-	"0x00000000000000000000000000000000000000000000000000000000"
-	"00000000000000000000000000000000000000000000000000000001",
-	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
-	"fffffffffffffffffffffffffffffffffffffffffffffffffffffffe",
-	"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
-	"00000000000000000000000000000000000000000000000000000000",
-	NULL
-};
-
-static const char *const *bad_points_table[] = {
-	curve25519_bad_points,
-	curve448_bad_points,
-};
-
-static void mpi_ec_coefficient_normalize(MPI a, MPI p)
-{
-	if (a->sign) {
-		mpi_resize(a, p->nlimbs);
-		mpihelp_sub_n(a->d, p->d, a->d, p->nlimbs);
-		a->nlimbs = p->nlimbs;
-		a->sign = 0;
-	}
-}
-
-/* This function initialized a context for elliptic curve based on the
- * field GF(p).  P is the prime specifying this field, A is the first
- * coefficient.  CTX is expected to be zeroized.
- */
-void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model,
-			enum ecc_dialects dialect,
-			int flags, MPI p, MPI a, MPI b)
-{
-	int i;
-	static int use_barrett = -1 /* TODO: 1 or -1 */;
-
-	mpi_ec_coefficient_normalize(a, p);
-	mpi_ec_coefficient_normalize(b, p);
-
-	/* Fixme: Do we want to check some constraints? e.g.  a < p  */
-
-	ctx->model = model;
-	ctx->dialect = dialect;
-	ctx->flags = flags;
-	if (dialect == ECC_DIALECT_ED25519)
-		ctx->nbits = 256;
-	else
-		ctx->nbits = mpi_get_nbits(p);
-	ctx->p = mpi_copy(p);
-	ctx->a = mpi_copy(a);
-	ctx->b = mpi_copy(b);
-
-	ctx->d = NULL;
-	ctx->t.two_inv_p = NULL;
-
-	ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL;
-
-	mpi_ec_get_reset(ctx);
-
-	if (model == MPI_EC_MONTGOMERY) {
-		for (i = 0; i < DIM(bad_points_table); i++) {
-			MPI p_candidate = mpi_scanval(bad_points_table[i][0]);
-			int match_p = !mpi_cmp(ctx->p, p_candidate);
-			int j;
-
-			mpi_free(p_candidate);
-			if (!match_p)
-				continue;
-
-			for (j = 0; i < DIM(ctx->t.scratch) && bad_points_table[i][j]; j++)
-				ctx->t.scratch[j] = mpi_scanval(bad_points_table[i][j]);
-		}
-	} else {
-		/* Allocate scratch variables.  */
-		for (i = 0; i < DIM(ctx->t.scratch); i++)
-			ctx->t.scratch[i] = mpi_alloc_like(ctx->p);
-	}
-
-	ctx->addm = ec_addm;
-	ctx->subm = ec_subm;
-	ctx->mulm = ec_mulm;
-	ctx->mul2 = ec_mul2;
-	ctx->pow2 = ec_pow2;
-
-	for (i = 0; field_table[i].p; i++) {
-		MPI f_p;
-
-		f_p = mpi_scanval(field_table[i].p);
-		if (!f_p)
-			break;
-
-		if (!mpi_cmp(p, f_p)) {
-			ctx->addm = field_table[i].addm;
-			ctx->subm = field_table[i].subm;
-			ctx->mulm = field_table[i].mulm;
-			ctx->mul2 = field_table[i].mul2;
-			ctx->pow2 = field_table[i].pow2;
-			mpi_free(f_p);
-
-			mpi_resize(ctx->a, ctx->p->nlimbs);
-			ctx->a->nlimbs = ctx->p->nlimbs;
-
-			mpi_resize(ctx->b, ctx->p->nlimbs);
-			ctx->b->nlimbs = ctx->p->nlimbs;
-
-			for (i = 0; i < DIM(ctx->t.scratch) && ctx->t.scratch[i]; i++)
-				ctx->t.scratch[i]->nlimbs = ctx->p->nlimbs;
-
-			break;
-		}
-
-		mpi_free(f_p);
-	}
-}
-EXPORT_SYMBOL_GPL(mpi_ec_init);
-
-void mpi_ec_deinit(struct mpi_ec_ctx *ctx)
-{
-	int i;
-
-	mpi_barrett_free(ctx->t.p_barrett);
-
-	/* Domain parameter.  */
-	mpi_free(ctx->p);
-	mpi_free(ctx->a);
-	mpi_free(ctx->b);
-	mpi_point_release(ctx->G);
-	mpi_free(ctx->n);
-
-	/* The key.  */
-	mpi_point_release(ctx->Q);
-	mpi_free(ctx->d);
-
-	/* Private data of ec.c.  */
-	mpi_free(ctx->t.two_inv_p);
-
-	for (i = 0; i < DIM(ctx->t.scratch); i++)
-		mpi_free(ctx->t.scratch[i]);
-}
-EXPORT_SYMBOL_GPL(mpi_ec_deinit);
-
-/* Compute the affine coordinates from the projective coordinates in
- * POINT.  Set them into X and Y.  If one coordinate is not required,
- * X or Y may be passed as NULL.  CTX is the usual context. Returns: 0
- * on success or !0 if POINT is at infinity.
- */
-int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx)
-{
-	if (!mpi_cmp_ui(point->z, 0))
-		return -1;
-
-	switch (ctx->model) {
-	case MPI_EC_WEIERSTRASS: /* Using Jacobian coordinates.  */
-		{
-			MPI z1, z2, z3;
-
-			z1 = mpi_new(0);
-			z2 = mpi_new(0);
-			ec_invm(z1, point->z, ctx);  /* z1 = z^(-1) mod p  */
-			ec_mulm(z2, z1, z1, ctx);    /* z2 = z^(-2) mod p  */
-
-			if (x)
-				ec_mulm(x, point->x, z2, ctx);
-
-			if (y) {
-				z3 = mpi_new(0);
-				ec_mulm(z3, z2, z1, ctx);      /* z3 = z^(-3) mod p */
-				ec_mulm(y, point->y, z3, ctx);
-				mpi_free(z3);
-			}
-
-			mpi_free(z2);
-			mpi_free(z1);
-		}
-		return 0;
-
-	case MPI_EC_MONTGOMERY:
-		{
-			if (x)
-				mpi_set(x, point->x);
-
-			if (y) {
-				log_fatal("%s: Getting Y-coordinate on %s is not supported\n",
-						"mpi_ec_get_affine", "Montgomery");
-				return -1;
-			}
-		}
-		return 0;
-
-	case MPI_EC_EDWARDS:
-		{
-			MPI z;
-
-			z = mpi_new(0);
-			ec_invm(z, point->z, ctx);
-
-			mpi_resize(z, ctx->p->nlimbs);
-			z->nlimbs = ctx->p->nlimbs;
-
-			if (x) {
-				mpi_resize(x, ctx->p->nlimbs);
-				x->nlimbs = ctx->p->nlimbs;
-				ctx->mulm(x, point->x, z, ctx);
-			}
-			if (y) {
-				mpi_resize(y, ctx->p->nlimbs);
-				y->nlimbs = ctx->p->nlimbs;
-				ctx->mulm(y, point->y, z, ctx);
-			}
-
-			mpi_free(z);
-		}
-		return 0;
-
-	default:
-		return -1;
-	}
-}
-EXPORT_SYMBOL_GPL(mpi_ec_get_affine);
-
-/*  RESULT = 2 * POINT  (Weierstrass version). */
-static void dup_point_weierstrass(MPI_POINT result,
-		MPI_POINT point, struct mpi_ec_ctx *ctx)
-{
-#define x3 (result->x)
-#define y3 (result->y)
-#define z3 (result->z)
-#define t1 (ctx->t.scratch[0])
-#define t2 (ctx->t.scratch[1])
-#define t3 (ctx->t.scratch[2])
-#define l1 (ctx->t.scratch[3])
-#define l2 (ctx->t.scratch[4])
-#define l3 (ctx->t.scratch[5])
-
-	if (!mpi_cmp_ui(point->y, 0) || !mpi_cmp_ui(point->z, 0)) {
-		/* P_y == 0 || P_z == 0 => [1:1:0] */
-		mpi_set_ui(x3, 1);
-		mpi_set_ui(y3, 1);
-		mpi_set_ui(z3, 0);
-	} else {
-		if (ec_get_a_is_pminus3(ctx)) {
-			/* Use the faster case.  */
-			/* L1 = 3(X - Z^2)(X + Z^2) */
-			/*                          T1: used for Z^2. */
-			/*                          T2: used for the right term. */
-			ec_pow2(t1, point->z, ctx);
-			ec_subm(l1, point->x, t1, ctx);
-			ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx);
-			ec_addm(t2, point->x, t1, ctx);
-			ec_mulm(l1, l1, t2, ctx);
-		} else {
-			/* Standard case. */
-			/* L1 = 3X^2 + aZ^4 */
-			/*                          T1: used for aZ^4. */
-			ec_pow2(l1, point->x, ctx);
-			ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx);
-			ec_powm(t1, point->z, mpi_const(MPI_C_FOUR), ctx);
-			ec_mulm(t1, t1, ctx->a, ctx);
-			ec_addm(l1, l1, t1, ctx);
-		}
-		/* Z3 = 2YZ */
-		ec_mulm(z3, point->y, point->z, ctx);
-		ec_mul2(z3, z3, ctx);
-
-		/* L2 = 4XY^2 */
-		/*                              T2: used for Y2; required later. */
-		ec_pow2(t2, point->y, ctx);
-		ec_mulm(l2, t2, point->x, ctx);
-		ec_mulm(l2, l2, mpi_const(MPI_C_FOUR), ctx);
-
-		/* X3 = L1^2 - 2L2 */
-		/*                              T1: used for L2^2. */
-		ec_pow2(x3, l1, ctx);
-		ec_mul2(t1, l2, ctx);
-		ec_subm(x3, x3, t1, ctx);
-
-		/* L3 = 8Y^4 */
-		/*                              T2: taken from above. */
-		ec_pow2(t2, t2, ctx);
-		ec_mulm(l3, t2, mpi_const(MPI_C_EIGHT), ctx);
-
-		/* Y3 = L1(L2 - X3) - L3 */
-		ec_subm(y3, l2, x3, ctx);
-		ec_mulm(y3, y3, l1, ctx);
-		ec_subm(y3, y3, l3, ctx);
-	}
-
-#undef x3
-#undef y3
-#undef z3
-#undef t1
-#undef t2
-#undef t3
-#undef l1
-#undef l2
-#undef l3
-}
-
-/*  RESULT = 2 * POINT  (Montgomery version). */
-static void dup_point_montgomery(MPI_POINT result,
-				MPI_POINT point, struct mpi_ec_ctx *ctx)
-{
-	(void)result;
-	(void)point;
-	(void)ctx;
-	log_fatal("%s: %s not yet supported\n",
-			"mpi_ec_dup_point", "Montgomery");
-}
-
-/*  RESULT = 2 * POINT  (Twisted Edwards version). */
-static void dup_point_edwards(MPI_POINT result,
-		MPI_POINT point, struct mpi_ec_ctx *ctx)
-{
-#define X1 (point->x)
-#define Y1 (point->y)
-#define Z1 (point->z)
-#define X3 (result->x)
-#define Y3 (result->y)
-#define Z3 (result->z)
-#define B (ctx->t.scratch[0])
-#define C (ctx->t.scratch[1])
-#define D (ctx->t.scratch[2])
-#define E (ctx->t.scratch[3])
-#define F (ctx->t.scratch[4])
-#define H (ctx->t.scratch[5])
-#define J (ctx->t.scratch[6])
-
-	/* Compute: (X_3 : Y_3 : Z_3) = 2( X_1 : Y_1 : Z_1 ) */
-
-	/* B = (X_1 + Y_1)^2  */
-	ctx->addm(B, X1, Y1, ctx);
-	ctx->pow2(B, B, ctx);
-
-	/* C = X_1^2 */
-	/* D = Y_1^2 */
-	ctx->pow2(C, X1, ctx);
-	ctx->pow2(D, Y1, ctx);
-
-	/* E = aC */
-	if (ctx->dialect == ECC_DIALECT_ED25519)
-		ctx->subm(E, ctx->p, C, ctx);
-	else
-		ctx->mulm(E, ctx->a, C, ctx);
-
-	/* F = E + D */
-	ctx->addm(F, E, D, ctx);
-
-	/* H = Z_1^2 */
-	ctx->pow2(H, Z1, ctx);
-
-	/* J = F - 2H */
-	ctx->mul2(J, H, ctx);
-	ctx->subm(J, F, J, ctx);
-
-	/* X_3 = (B - C - D) · J */
-	ctx->subm(X3, B, C, ctx);
-	ctx->subm(X3, X3, D, ctx);
-	ctx->mulm(X3, X3, J, ctx);
-
-	/* Y_3 = F · (E - D) */
-	ctx->subm(Y3, E, D, ctx);
-	ctx->mulm(Y3, Y3, F, ctx);
-
-	/* Z_3 = F · J */
-	ctx->mulm(Z3, F, J, ctx);
-
-#undef X1
-#undef Y1
-#undef Z1
-#undef X3
-#undef Y3
-#undef Z3
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef H
-#undef J
-}
-
-/*  RESULT = 2 * POINT  */
-static void
-mpi_ec_dup_point(MPI_POINT result, MPI_POINT point, struct mpi_ec_ctx *ctx)
-{
-	switch (ctx->model) {
-	case MPI_EC_WEIERSTRASS:
-		dup_point_weierstrass(result, point, ctx);
-		break;
-	case MPI_EC_MONTGOMERY:
-		dup_point_montgomery(result, point, ctx);
-		break;
-	case MPI_EC_EDWARDS:
-		dup_point_edwards(result, point, ctx);
-		break;
-	}
-}
-
-/* RESULT = P1 + P2  (Weierstrass version).*/
-static void add_points_weierstrass(MPI_POINT result,
-		MPI_POINT p1, MPI_POINT p2,
-		struct mpi_ec_ctx *ctx)
-{
-#define x1 (p1->x)
-#define y1 (p1->y)
-#define z1 (p1->z)
-#define x2 (p2->x)
-#define y2 (p2->y)
-#define z2 (p2->z)
-#define x3 (result->x)
-#define y3 (result->y)
-#define z3 (result->z)
-#define l1 (ctx->t.scratch[0])
-#define l2 (ctx->t.scratch[1])
-#define l3 (ctx->t.scratch[2])
-#define l4 (ctx->t.scratch[3])
-#define l5 (ctx->t.scratch[4])
-#define l6 (ctx->t.scratch[5])
-#define l7 (ctx->t.scratch[6])
-#define l8 (ctx->t.scratch[7])
-#define l9 (ctx->t.scratch[8])
-#define t1 (ctx->t.scratch[9])
-#define t2 (ctx->t.scratch[10])
-
-	if ((!mpi_cmp(x1, x2)) && (!mpi_cmp(y1, y2)) && (!mpi_cmp(z1, z2))) {
-		/* Same point; need to call the duplicate function.  */
-		mpi_ec_dup_point(result, p1, ctx);
-	} else if (!mpi_cmp_ui(z1, 0)) {
-		/* P1 is at infinity.  */
-		mpi_set(x3, p2->x);
-		mpi_set(y3, p2->y);
-		mpi_set(z3, p2->z);
-	} else if (!mpi_cmp_ui(z2, 0)) {
-		/* P2 is at infinity.  */
-		mpi_set(x3, p1->x);
-		mpi_set(y3, p1->y);
-		mpi_set(z3, p1->z);
-	} else {
-		int z1_is_one = !mpi_cmp_ui(z1, 1);
-		int z2_is_one = !mpi_cmp_ui(z2, 1);
-
-		/* l1 = x1 z2^2  */
-		/* l2 = x2 z1^2  */
-		if (z2_is_one)
-			mpi_set(l1, x1);
-		else {
-			ec_pow2(l1, z2, ctx);
-			ec_mulm(l1, l1, x1, ctx);
-		}
-		if (z1_is_one)
-			mpi_set(l2, x2);
-		else {
-			ec_pow2(l2, z1, ctx);
-			ec_mulm(l2, l2, x2, ctx);
-		}
-		/* l3 = l1 - l2 */
-		ec_subm(l3, l1, l2, ctx);
-		/* l4 = y1 z2^3  */
-		ec_powm(l4, z2, mpi_const(MPI_C_THREE), ctx);
-		ec_mulm(l4, l4, y1, ctx);
-		/* l5 = y2 z1^3  */
-		ec_powm(l5, z1, mpi_const(MPI_C_THREE), ctx);
-		ec_mulm(l5, l5, y2, ctx);
-		/* l6 = l4 - l5  */
-		ec_subm(l6, l4, l5, ctx);
-
-		if (!mpi_cmp_ui(l3, 0)) {
-			if (!mpi_cmp_ui(l6, 0)) {
-				/* P1 and P2 are the same - use duplicate function. */
-				mpi_ec_dup_point(result, p1, ctx);
-			} else {
-				/* P1 is the inverse of P2.  */
-				mpi_set_ui(x3, 1);
-				mpi_set_ui(y3, 1);
-				mpi_set_ui(z3, 0);
-			}
-		} else {
-			/* l7 = l1 + l2  */
-			ec_addm(l7, l1, l2, ctx);
-			/* l8 = l4 + l5  */
-			ec_addm(l8, l4, l5, ctx);
-			/* z3 = z1 z2 l3  */
-			ec_mulm(z3, z1, z2, ctx);
-			ec_mulm(z3, z3, l3, ctx);
-			/* x3 = l6^2 - l7 l3^2  */
-			ec_pow2(t1, l6, ctx);
-			ec_pow2(t2, l3, ctx);
-			ec_mulm(t2, t2, l7, ctx);
-			ec_subm(x3, t1, t2, ctx);
-			/* l9 = l7 l3^2 - 2 x3  */
-			ec_mul2(t1, x3, ctx);
-			ec_subm(l9, t2, t1, ctx);
-			/* y3 = (l9 l6 - l8 l3^3)/2  */
-			ec_mulm(l9, l9, l6, ctx);
-			ec_powm(t1, l3, mpi_const(MPI_C_THREE), ctx); /* fixme: Use saved value*/
-			ec_mulm(t1, t1, l8, ctx);
-			ec_subm(y3, l9, t1, ctx);
-			ec_mulm(y3, y3, ec_get_two_inv_p(ctx), ctx);
-		}
-	}
-
-#undef x1
-#undef y1
-#undef z1
-#undef x2
-#undef y2
-#undef z2
-#undef x3
-#undef y3
-#undef z3
-#undef l1
-#undef l2
-#undef l3
-#undef l4
-#undef l5
-#undef l6
-#undef l7
-#undef l8
-#undef l9
-#undef t1
-#undef t2
-}
-
-/* RESULT = P1 + P2  (Montgomery version).*/
-static void add_points_montgomery(MPI_POINT result,
-		MPI_POINT p1, MPI_POINT p2,
-		struct mpi_ec_ctx *ctx)
-{
-	(void)result;
-	(void)p1;
-	(void)p2;
-	(void)ctx;
-	log_fatal("%s: %s not yet supported\n",
-			"mpi_ec_add_points", "Montgomery");
-}
-
-/* RESULT = P1 + P2  (Twisted Edwards version).*/
-static void add_points_edwards(MPI_POINT result,
-		MPI_POINT p1, MPI_POINT p2,
-		struct mpi_ec_ctx *ctx)
-{
-#define X1 (p1->x)
-#define Y1 (p1->y)
-#define Z1 (p1->z)
-#define X2 (p2->x)
-#define Y2 (p2->y)
-#define Z2 (p2->z)
-#define X3 (result->x)
-#define Y3 (result->y)
-#define Z3 (result->z)
-#define A (ctx->t.scratch[0])
-#define B (ctx->t.scratch[1])
-#define C (ctx->t.scratch[2])
-#define D (ctx->t.scratch[3])
-#define E (ctx->t.scratch[4])
-#define F (ctx->t.scratch[5])
-#define G (ctx->t.scratch[6])
-#define tmp (ctx->t.scratch[7])
-
-	point_resize(result, ctx);
-
-	/* Compute: (X_3 : Y_3 : Z_3) = (X_1 : Y_1 : Z_1) + (X_2 : Y_2 : Z_3) */
-
-	/* A = Z1 · Z2 */
-	ctx->mulm(A, Z1, Z2, ctx);
-
-	/* B = A^2 */
-	ctx->pow2(B, A, ctx);
-
-	/* C = X1 · X2 */
-	ctx->mulm(C, X1, X2, ctx);
-
-	/* D = Y1 · Y2 */
-	ctx->mulm(D, Y1, Y2, ctx);
-
-	/* E = d · C · D */
-	ctx->mulm(E, ctx->b, C, ctx);
-	ctx->mulm(E, E, D, ctx);
-
-	/* F = B - E */
-	ctx->subm(F, B, E, ctx);
-
-	/* G = B + E */
-	ctx->addm(G, B, E, ctx);
-
-	/* X_3 = A · F · ((X_1 + Y_1) · (X_2 + Y_2) - C - D) */
-	ctx->addm(tmp, X1, Y1, ctx);
-	ctx->addm(X3, X2, Y2, ctx);
-	ctx->mulm(X3, X3, tmp, ctx);
-	ctx->subm(X3, X3, C, ctx);
-	ctx->subm(X3, X3, D, ctx);
-	ctx->mulm(X3, X3, F, ctx);
-	ctx->mulm(X3, X3, A, ctx);
-
-	/* Y_3 = A · G · (D - aC) */
-	if (ctx->dialect == ECC_DIALECT_ED25519) {
-		ctx->addm(Y3, D, C, ctx);
-	} else {
-		ctx->mulm(Y3, ctx->a, C, ctx);
-		ctx->subm(Y3, D, Y3, ctx);
-	}
-	ctx->mulm(Y3, Y3, G, ctx);
-	ctx->mulm(Y3, Y3, A, ctx);
-
-	/* Z_3 = F · G */
-	ctx->mulm(Z3, F, G, ctx);
-
-
-#undef X1
-#undef Y1
-#undef Z1
-#undef X2
-#undef Y2
-#undef Z2
-#undef X3
-#undef Y3
-#undef Z3
-#undef A
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef G
-#undef tmp
-}
-
-/* Compute a step of Montgomery Ladder (only use X and Z in the point).
- * Inputs:  P1, P2, and x-coordinate of DIF = P1 - P1.
- * Outputs: PRD = 2 * P1 and  SUM = P1 + P2.
- */
-static void montgomery_ladder(MPI_POINT prd, MPI_POINT sum,
-		MPI_POINT p1, MPI_POINT p2, MPI dif_x,
-		struct mpi_ec_ctx *ctx)
-{
-	ctx->addm(sum->x, p2->x, p2->z, ctx);
-	ctx->subm(p2->z, p2->x, p2->z, ctx);
-	ctx->addm(prd->x, p1->x, p1->z, ctx);
-	ctx->subm(p1->z, p1->x, p1->z, ctx);
-	ctx->mulm(p2->x, p1->z, sum->x, ctx);
-	ctx->mulm(p2->z, prd->x, p2->z, ctx);
-	ctx->pow2(p1->x, prd->x, ctx);
-	ctx->pow2(p1->z, p1->z, ctx);
-	ctx->addm(sum->x, p2->x, p2->z, ctx);
-	ctx->subm(p2->z, p2->x, p2->z, ctx);
-	ctx->mulm(prd->x, p1->x, p1->z, ctx);
-	ctx->subm(p1->z, p1->x, p1->z, ctx);
-	ctx->pow2(sum->x, sum->x, ctx);
-	ctx->pow2(sum->z, p2->z, ctx);
-	ctx->mulm(prd->z, p1->z, ctx->a, ctx); /* CTX->A: (a-2)/4 */
-	ctx->mulm(sum->z, sum->z, dif_x, ctx);
-	ctx->addm(prd->z, p1->x, prd->z, ctx);
-	ctx->mulm(prd->z, prd->z, p1->z, ctx);
-}
-
-/* RESULT = P1 + P2 */
-void mpi_ec_add_points(MPI_POINT result,
-		MPI_POINT p1, MPI_POINT p2,
-		struct mpi_ec_ctx *ctx)
-{
-	switch (ctx->model) {
-	case MPI_EC_WEIERSTRASS:
-		add_points_weierstrass(result, p1, p2, ctx);
-		break;
-	case MPI_EC_MONTGOMERY:
-		add_points_montgomery(result, p1, p2, ctx);
-		break;
-	case MPI_EC_EDWARDS:
-		add_points_edwards(result, p1, p2, ctx);
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(mpi_ec_add_points);
-
-/* Scalar point multiplication - the main function for ECC.  If takes
- * an integer SCALAR and a POINT as well as the usual context CTX.
- * RESULT will be set to the resulting point.
- */
-void mpi_ec_mul_point(MPI_POINT result,
-			MPI scalar, MPI_POINT point,
-			struct mpi_ec_ctx *ctx)
-{
-	MPI x1, y1, z1, k, h, yy;
-	unsigned int i, loops;
-	struct gcry_mpi_point p1, p2, p1inv;
-
-	if (ctx->model == MPI_EC_EDWARDS) {
-		/* Simple left to right binary method.  Algorithm 3.27 from
-		 * {author={Hankerson, Darrel and Menezes, Alfred J. and Vanstone, Scott},
-		 *  title = {Guide to Elliptic Curve Cryptography},
-		 *  year = {2003}, isbn = {038795273X},
-		 *  url = {http://www.cacr.math.uwaterloo.ca/ecc/},
-		 *  publisher = {Springer-Verlag New York, Inc.}}
-		 */
-		unsigned int nbits;
-		int j;
-
-		if (mpi_cmp(scalar, ctx->p) >= 0)
-			nbits = mpi_get_nbits(scalar);
-		else
-			nbits = mpi_get_nbits(ctx->p);
-
-		mpi_set_ui(result->x, 0);
-		mpi_set_ui(result->y, 1);
-		mpi_set_ui(result->z, 1);
-		point_resize(point, ctx);
-
-		point_resize(result, ctx);
-		point_resize(point, ctx);
-
-		for (j = nbits-1; j >= 0; j--) {
-			mpi_ec_dup_point(result, result, ctx);
-			if (mpi_test_bit(scalar, j))
-				mpi_ec_add_points(result, result, point, ctx);
-		}
-		return;
-	} else if (ctx->model == MPI_EC_MONTGOMERY) {
-		unsigned int nbits;
-		int j;
-		struct gcry_mpi_point p1_, p2_;
-		MPI_POINT q1, q2, prd, sum;
-		unsigned long sw;
-		mpi_size_t rsize;
-
-		/* Compute scalar point multiplication with Montgomery Ladder.
-		 * Note that we don't use Y-coordinate in the points at all.
-		 * RESULT->Y will be filled by zero.
-		 */
-
-		nbits = mpi_get_nbits(scalar);
-		point_init(&p1);
-		point_init(&p2);
-		point_init(&p1_);
-		point_init(&p2_);
-		mpi_set_ui(p1.x, 1);
-		mpi_free(p2.x);
-		p2.x = mpi_copy(point->x);
-		mpi_set_ui(p2.z, 1);
-
-		point_resize(&p1, ctx);
-		point_resize(&p2, ctx);
-		point_resize(&p1_, ctx);
-		point_resize(&p2_, ctx);
-
-		mpi_resize(point->x, ctx->p->nlimbs);
-		point->x->nlimbs = ctx->p->nlimbs;
-
-		q1 = &p1;
-		q2 = &p2;
-		prd = &p1_;
-		sum = &p2_;
-
-		for (j = nbits-1; j >= 0; j--) {
-			MPI_POINT t;
-
-			sw = mpi_test_bit(scalar, j);
-			point_swap_cond(q1, q2, sw, ctx);
-			montgomery_ladder(prd, sum, q1, q2, point->x, ctx);
-			point_swap_cond(prd, sum, sw, ctx);
-			t = q1;  q1 = prd;  prd = t;
-			t = q2;  q2 = sum;  sum = t;
-		}
-
-		mpi_clear(result->y);
-		sw = (nbits & 1);
-		point_swap_cond(&p1, &p1_, sw, ctx);
-
-		rsize = p1.z->nlimbs;
-		MPN_NORMALIZE(p1.z->d, rsize);
-		if (rsize == 0) {
-			mpi_set_ui(result->x, 1);
-			mpi_set_ui(result->z, 0);
-		} else {
-			z1 = mpi_new(0);
-			ec_invm(z1, p1.z, ctx);
-			ec_mulm(result->x, p1.x, z1, ctx);
-			mpi_set_ui(result->z, 1);
-			mpi_free(z1);
-		}
-
-		point_free(&p1);
-		point_free(&p2);
-		point_free(&p1_);
-		point_free(&p2_);
-		return;
-	}
-
-	x1 = mpi_alloc_like(ctx->p);
-	y1 = mpi_alloc_like(ctx->p);
-	h  = mpi_alloc_like(ctx->p);
-	k  = mpi_copy(scalar);
-	yy = mpi_copy(point->y);
-
-	if (mpi_has_sign(k)) {
-		k->sign = 0;
-		ec_invm(yy, yy, ctx);
-	}
-
-	if (!mpi_cmp_ui(point->z, 1)) {
-		mpi_set(x1, point->x);
-		mpi_set(y1, yy);
-	} else {
-		MPI z2, z3;
-
-		z2 = mpi_alloc_like(ctx->p);
-		z3 = mpi_alloc_like(ctx->p);
-		ec_mulm(z2, point->z, point->z, ctx);
-		ec_mulm(z3, point->z, z2, ctx);
-		ec_invm(z2, z2, ctx);
-		ec_mulm(x1, point->x, z2, ctx);
-		ec_invm(z3, z3, ctx);
-		ec_mulm(y1, yy, z3, ctx);
-		mpi_free(z2);
-		mpi_free(z3);
-	}
-	z1 = mpi_copy(mpi_const(MPI_C_ONE));
-
-	mpi_mul(h, k, mpi_const(MPI_C_THREE)); /* h = 3k */
-	loops = mpi_get_nbits(h);
-	if (loops < 2) {
-		/* If SCALAR is zero, the above mpi_mul sets H to zero and thus
-		 * LOOPs will be zero.  To avoid an underflow of I in the main
-		 * loop we set LOOP to 2 and the result to (0,0,0).
-		 */
-		loops = 2;
-		mpi_clear(result->x);
-		mpi_clear(result->y);
-		mpi_clear(result->z);
-	} else {
-		mpi_set(result->x, point->x);
-		mpi_set(result->y, yy);
-		mpi_set(result->z, point->z);
-	}
-	mpi_free(yy); yy = NULL;
-
-	p1.x = x1; x1 = NULL;
-	p1.y = y1; y1 = NULL;
-	p1.z = z1; z1 = NULL;
-	point_init(&p2);
-	point_init(&p1inv);
-
-	/* Invert point: y = p - y mod p  */
-	point_set(&p1inv, &p1);
-	ec_subm(p1inv.y, ctx->p, p1inv.y, ctx);
-
-	for (i = loops-2; i > 0; i--) {
-		mpi_ec_dup_point(result, result, ctx);
-		if (mpi_test_bit(h, i) == 1 && mpi_test_bit(k, i) == 0) {
-			point_set(&p2, result);
-			mpi_ec_add_points(result, &p2, &p1, ctx);
-		}
-		if (mpi_test_bit(h, i) == 0 && mpi_test_bit(k, i) == 1) {
-			point_set(&p2, result);
-			mpi_ec_add_points(result, &p2, &p1inv, ctx);
-		}
-	}
-
-	point_free(&p1);
-	point_free(&p2);
-	point_free(&p1inv);
-	mpi_free(h);
-	mpi_free(k);
-}
-EXPORT_SYMBOL_GPL(mpi_ec_mul_point);
-
-/* Return true if POINT is on the curve described by CTX.  */
-int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx)
-{
-	int res = 0;
-	MPI x, y, w;
-
-	x = mpi_new(0);
-	y = mpi_new(0);
-	w = mpi_new(0);
-
-	/* Check that the point is in range.  This needs to be done here and
-	 * not after conversion to affine coordinates.
-	 */
-	if (mpi_cmpabs(point->x, ctx->p) >= 0)
-		goto leave;
-	if (mpi_cmpabs(point->y, ctx->p) >= 0)
-		goto leave;
-	if (mpi_cmpabs(point->z, ctx->p) >= 0)
-		goto leave;
-
-	switch (ctx->model) {
-	case MPI_EC_WEIERSTRASS:
-		{
-			MPI xxx;
-
-			if (mpi_ec_get_affine(x, y, point, ctx))
-				goto leave;
-
-			xxx = mpi_new(0);
-
-			/* y^2 == x^3 + a·x + b */
-			ec_pow2(y, y, ctx);
-
-			ec_pow3(xxx, x, ctx);
-			ec_mulm(w, ctx->a, x, ctx);
-			ec_addm(w, w, ctx->b, ctx);
-			ec_addm(w, w, xxx, ctx);
-
-			if (!mpi_cmp(y, w))
-				res = 1;
-
-			mpi_free(xxx);
-		}
-		break;
-
-	case MPI_EC_MONTGOMERY:
-		{
-#define xx y
-			/* With Montgomery curve, only X-coordinate is valid. */
-			if (mpi_ec_get_affine(x, NULL, point, ctx))
-				goto leave;
-
-			/* The equation is: b * y^2 == x^3 + a · x^2 + x */
-			/* We check if right hand is quadratic residue or not by
-			 * Euler's criterion.
-			 */
-			/* CTX->A has (a-2)/4 and CTX->B has b^-1 */
-			ec_mulm(w, ctx->a, mpi_const(MPI_C_FOUR), ctx);
-			ec_addm(w, w, mpi_const(MPI_C_TWO), ctx);
-			ec_mulm(w, w, x, ctx);
-			ec_pow2(xx, x, ctx);
-			ec_addm(w, w, xx, ctx);
-			ec_addm(w, w, mpi_const(MPI_C_ONE), ctx);
-			ec_mulm(w, w, x, ctx);
-			ec_mulm(w, w, ctx->b, ctx);
-#undef xx
-			/* Compute Euler's criterion: w^(p-1)/2 */
-#define p_minus1 y
-			ec_subm(p_minus1, ctx->p, mpi_const(MPI_C_ONE), ctx);
-			mpi_rshift(p_minus1, p_minus1, 1);
-			ec_powm(w, w, p_minus1, ctx);
-
-			res = !mpi_cmp_ui(w, 1);
-#undef p_minus1
-		}
-		break;
-
-	case MPI_EC_EDWARDS:
-		{
-			if (mpi_ec_get_affine(x, y, point, ctx))
-				goto leave;
-
-			mpi_resize(w, ctx->p->nlimbs);
-			w->nlimbs = ctx->p->nlimbs;
-
-			/* a · x^2 + y^2 - 1 - b · x^2 · y^2 == 0 */
-			ctx->pow2(x, x, ctx);
-			ctx->pow2(y, y, ctx);
-			if (ctx->dialect == ECC_DIALECT_ED25519)
-				ctx->subm(w, ctx->p, x, ctx);
-			else
-				ctx->mulm(w, ctx->a, x, ctx);
-			ctx->addm(w, w, y, ctx);
-			ctx->mulm(x, x, y, ctx);
-			ctx->mulm(x, x, ctx->b, ctx);
-			ctx->subm(w, w, x, ctx);
-			if (!mpi_cmp_ui(w, 1))
-				res = 1;
-		}
-		break;
-	}
-
-leave:
-	mpi_free(w);
-	mpi_free(x);
-	mpi_free(y);
-
-	return res;
-}
-EXPORT_SYMBOL_GPL(mpi_ec_curve_point);
diff --git a/lib/crypto/mpi/mpi-add.c b/lib/crypto/mpi/mpi-add.c
index 9056fc5167fc..3015140d4860 100644
--- a/lib/crypto/mpi/mpi-add.c
+++ b/lib/crypto/mpi/mpi-add.c
@@ -13,62 +13,12 @@
 
 #include "mpi-internal.h"
 
-/****************
- * Add the unsigned integer V to the mpi-integer U and store the
- * result in W. U and V may be the same.
- */
-void mpi_add_ui(MPI w, MPI u, unsigned long v)
-{
-	mpi_ptr_t wp, up;
-	mpi_size_t usize, wsize;
-	int usign, wsign;
-
-	usize = u->nlimbs;
-	usign = u->sign;
-	wsign = 0;
-
-	/* If not space for W (and possible carry), increase space.  */
-	wsize = usize + 1;
-	if (w->alloced < wsize)
-		mpi_resize(w, wsize);
-
-	/* These must be after realloc (U may be the same as W).  */
-	up = u->d;
-	wp = w->d;
-
-	if (!usize) {  /* simple */
-		wp[0] = v;
-		wsize = v ? 1:0;
-	} else if (!usign) {  /* mpi is not negative */
-		mpi_limb_t cy;
-		cy = mpihelp_add_1(wp, up, usize, v);
-		wp[usize] = cy;
-		wsize = usize + cy;
-	} else {
-		/* The signs are different.  Need exact comparison to determine
-		 * which operand to subtract from which.
-		 */
-		if (usize == 1 && up[0] < v) {
-			wp[0] = v - up[0];
-			wsize = 1;
-		} else {
-			mpihelp_sub_1(wp, up, usize, v);
-			/* Size can decrease with at most one limb. */
-			wsize = usize - (wp[usize-1] == 0);
-			wsign = 1;
-		}
-	}
-
-	w->nlimbs = wsize;
-	w->sign   = wsign;
-}
-
-
-void mpi_add(MPI w, MPI u, MPI v)
+int mpi_add(MPI w, MPI u, MPI v)
 {
 	mpi_ptr_t wp, up, vp;
 	mpi_size_t usize, vsize, wsize;
 	int usign, vsign, wsign;
+	int err;
 
 	if (u->nlimbs < v->nlimbs) { /* Swap U and V. */
 		usize = v->nlimbs;
@@ -76,7 +26,9 @@ void mpi_add(MPI w, MPI u, MPI v)
 		vsize = u->nlimbs;
 		vsign = u->sign;
 		wsize = usize + 1;
-		RESIZE_IF_NEEDED(w, wsize);
+		err = RESIZE_IF_NEEDED(w, wsize);
+		if (err)
+			return err;
 		/* These must be after realloc (u or v may be the same as w).  */
 		up = v->d;
 		vp = u->d;
@@ -86,7 +38,9 @@ void mpi_add(MPI w, MPI u, MPI v)
 		vsize = v->nlimbs;
 		vsign = v->sign;
 		wsize = usize + 1;
-		RESIZE_IF_NEEDED(w, wsize);
+		err = RESIZE_IF_NEEDED(w, wsize);
+		if (err)
+			return err;
 		/* These must be after realloc (u or v may be the same as w).  */
 		up = u->d;
 		vp = v->d;
@@ -128,28 +82,37 @@ void mpi_add(MPI w, MPI u, MPI v)
 
 	w->nlimbs = wsize;
 	w->sign = wsign;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(mpi_add);
 
-void mpi_sub(MPI w, MPI u, MPI v)
+int mpi_sub(MPI w, MPI u, MPI v)
 {
-	MPI vv = mpi_copy(v);
+	int err;
+	MPI vv;
+
+	vv = mpi_copy(v);
+	if (!vv)
+		return -ENOMEM;
+
 	vv->sign = !vv->sign;
-	mpi_add(w, u, vv);
+	err = mpi_add(w, u, vv);
 	mpi_free(vv);
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(mpi_sub);
 
-void mpi_addm(MPI w, MPI u, MPI v, MPI m)
+int mpi_addm(MPI w, MPI u, MPI v, MPI m)
 {
-	mpi_add(w, u, v);
-	mpi_mod(w, w, m);
+	return mpi_add(w, u, v) ?:
+	       mpi_mod(w, w, m);
 }
 EXPORT_SYMBOL_GPL(mpi_addm);
 
-void mpi_subm(MPI w, MPI u, MPI v, MPI m)
+int mpi_subm(MPI w, MPI u, MPI v, MPI m)
 {
-	mpi_sub(w, u, v);
-	mpi_mod(w, w, m);
+	return mpi_sub(w, u, v) ?:
+	       mpi_mod(w, w, m);
 }
 EXPORT_SYMBOL_GPL(mpi_subm);
diff --git a/lib/crypto/mpi/mpi-bit.c b/lib/crypto/mpi/mpi-bit.c
index 070ba784c9f1..934d81311360 100644
--- a/lib/crypto/mpi/mpi-bit.c
+++ b/lib/crypto/mpi/mpi-bit.c
@@ -32,7 +32,6 @@ void mpi_normalize(MPI a)
 	for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
 		;
 }
-EXPORT_SYMBOL_GPL(mpi_normalize);
 
 /****************
  * Return the number of bits in A.
@@ -77,9 +76,10 @@ EXPORT_SYMBOL_GPL(mpi_test_bit);
 /****************
  * Set bit N of A.
  */
-void mpi_set_bit(MPI a, unsigned int n)
+int mpi_set_bit(MPI a, unsigned int n)
 {
 	unsigned int i, limbno, bitno;
+	int err;
 
 	limbno = n / BITS_PER_MPI_LIMB;
 	bitno  = n % BITS_PER_MPI_LIMB;
@@ -87,106 +87,32 @@ void mpi_set_bit(MPI a, unsigned int n)
 	if (limbno >= a->nlimbs) {
 		for (i = a->nlimbs; i < a->alloced; i++)
 			a->d[i] = 0;
-		mpi_resize(a, limbno+1);
+		err = mpi_resize(a, limbno+1);
+		if (err)
+			return err;
 		a->nlimbs = limbno+1;
 	}
 	a->d[limbno] |= (A_LIMB_1<<bitno);
+	return 0;
 }
-
-/****************
- * Set bit N of A. and clear all bits above
- */
-void mpi_set_highbit(MPI a, unsigned int n)
-{
-	unsigned int i, limbno, bitno;
-
-	limbno = n / BITS_PER_MPI_LIMB;
-	bitno  = n % BITS_PER_MPI_LIMB;
-
-	if (limbno >= a->nlimbs) {
-		for (i = a->nlimbs; i < a->alloced; i++)
-			a->d[i] = 0;
-		mpi_resize(a, limbno+1);
-		a->nlimbs = limbno+1;
-	}
-	a->d[limbno] |= (A_LIMB_1<<bitno);
-	for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++)
-		a->d[limbno] &= ~(A_LIMB_1 << bitno);
-	a->nlimbs = limbno+1;
-}
-EXPORT_SYMBOL_GPL(mpi_set_highbit);
-
-/****************
- * clear bit N of A and all bits above
- */
-void mpi_clear_highbit(MPI a, unsigned int n)
-{
-	unsigned int limbno, bitno;
-
-	limbno = n / BITS_PER_MPI_LIMB;
-	bitno  = n % BITS_PER_MPI_LIMB;
-
-	if (limbno >= a->nlimbs)
-		return; /* not allocated, therefore no need to clear bits :-) */
-
-	for ( ; bitno < BITS_PER_MPI_LIMB; bitno++)
-		a->d[limbno] &= ~(A_LIMB_1 << bitno);
-	a->nlimbs = limbno+1;
-}
-
-/****************
- * Clear bit N of A.
- */
-void mpi_clear_bit(MPI a, unsigned int n)
-{
-	unsigned int limbno, bitno;
-
-	limbno = n / BITS_PER_MPI_LIMB;
-	bitno  = n % BITS_PER_MPI_LIMB;
-
-	if (limbno >= a->nlimbs)
-		return; /* Don't need to clear this bit, it's far too left.  */
-	a->d[limbno] &= ~(A_LIMB_1 << bitno);
-}
-EXPORT_SYMBOL_GPL(mpi_clear_bit);
-
-
-/****************
- * Shift A by COUNT limbs to the right
- * This is used only within the MPI library
- */
-void mpi_rshift_limbs(MPI a, unsigned int count)
-{
-	mpi_ptr_t ap = a->d;
-	mpi_size_t n = a->nlimbs;
-	unsigned int i;
-
-	if (count >= n) {
-		a->nlimbs = 0;
-		return;
-	}
-
-	for (i = 0; i < n - count; i++)
-		ap[i] = ap[i+count];
-	ap[i] = 0;
-	a->nlimbs -= count;
-}
+EXPORT_SYMBOL_GPL(mpi_set_bit);
 
 /*
  * Shift A by N bits to the right.
  */
-void mpi_rshift(MPI x, MPI a, unsigned int n)
+int mpi_rshift(MPI x, MPI a, unsigned int n)
 {
 	mpi_size_t xsize;
 	unsigned int i;
 	unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
 	unsigned int nbits = (n%BITS_PER_MPI_LIMB);
+	int err;
 
 	if (x == a) {
 		/* In-place operation.  */
 		if (nlimbs >= x->nlimbs) {
 			x->nlimbs = 0;
-			return;
+			return 0;
 		}
 
 		if (nlimbs) {
@@ -201,7 +127,9 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
 		/* Copy and shift by more or equal bits than in a limb. */
 		xsize = a->nlimbs;
 		x->sign = a->sign;
-		RESIZE_IF_NEEDED(x, xsize);
+		err = RESIZE_IF_NEEDED(x, xsize);
+		if (err)
+			return err;
 		x->nlimbs = xsize;
 		for (i = 0; i < a->nlimbs; i++)
 			x->d[i] = a->d[i];
@@ -209,15 +137,13 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
 
 		if (nlimbs >= x->nlimbs) {
 			x->nlimbs = 0;
-			return;
+			return 0;
 		}
 
-		if (nlimbs) {
-			for (i = 0; i < x->nlimbs - nlimbs; i++)
-				x->d[i] = x->d[i+nlimbs];
-			x->d[i] = 0;
-			x->nlimbs -= nlimbs;
-		}
+		for (i = 0; i < x->nlimbs - nlimbs; i++)
+			x->d[i] = x->d[i+nlimbs];
+		x->d[i] = 0;
+		x->nlimbs -= nlimbs;
 
 		if (x->nlimbs && nbits)
 			mpihelp_rshift(x->d, x->d, x->nlimbs, nbits);
@@ -225,7 +151,9 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
 		/* Copy and shift by less than bits in a limb.  */
 		xsize = a->nlimbs;
 		x->sign = a->sign;
-		RESIZE_IF_NEEDED(x, xsize);
+		err = RESIZE_IF_NEEDED(x, xsize);
+		if (err)
+			return err;
 		x->nlimbs = xsize;
 
 		if (xsize) {
@@ -241,68 +169,7 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
 		}
 	}
 	MPN_NORMALIZE(x->d, x->nlimbs);
-}
-EXPORT_SYMBOL_GPL(mpi_rshift);
 
-/****************
- * Shift A by COUNT limbs to the left
- * This is used only within the MPI library
- */
-void mpi_lshift_limbs(MPI a, unsigned int count)
-{
-	mpi_ptr_t ap;
-	int n = a->nlimbs;
-	int i;
-
-	if (!count || !n)
-		return;
-
-	RESIZE_IF_NEEDED(a, n+count);
-
-	ap = a->d;
-	for (i = n-1; i >= 0; i--)
-		ap[i+count] = ap[i];
-	for (i = 0; i < count; i++)
-		ap[i] = 0;
-	a->nlimbs += count;
-}
-
-/*
- * Shift A by N bits to the left.
- */
-void mpi_lshift(MPI x, MPI a, unsigned int n)
-{
-	unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
-	unsigned int nbits = (n%BITS_PER_MPI_LIMB);
-
-	if (x == a && !n)
-		return;  /* In-place shift with an amount of zero.  */
-
-	if (x != a) {
-		/* Copy A to X.  */
-		unsigned int alimbs = a->nlimbs;
-		int asign = a->sign;
-		mpi_ptr_t xp, ap;
-
-		RESIZE_IF_NEEDED(x, alimbs+nlimbs+1);
-		xp = x->d;
-		ap = a->d;
-		MPN_COPY(xp, ap, alimbs);
-		x->nlimbs = alimbs;
-		x->flags = a->flags;
-		x->sign = asign;
-	}
-
-	if (nlimbs && !nbits) {
-		/* Shift a full number of limbs.  */
-		mpi_lshift_limbs(x, nlimbs);
-	} else if (n) {
-		/* We use a very dump approach: Shift left by the number of
-		 * limbs plus one and than fix it up by an rshift.
-		 */
-		mpi_lshift_limbs(x, nlimbs+1);
-		mpi_rshift(x, x, BITS_PER_MPI_LIMB - nbits);
-	}
-
-	MPN_NORMALIZE(x->d, x->nlimbs);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(mpi_rshift);
diff --git a/lib/crypto/mpi/mpi-cmp.c b/lib/crypto/mpi/mpi-cmp.c
index 0835b6213235..ceaebe181cd7 100644
--- a/lib/crypto/mpi/mpi-cmp.c
+++ b/lib/crypto/mpi/mpi-cmp.c
@@ -45,54 +45,28 @@ int mpi_cmp_ui(MPI u, unsigned long v)
 }
 EXPORT_SYMBOL_GPL(mpi_cmp_ui);
 
-static int do_mpi_cmp(MPI u, MPI v, int absmode)
+int mpi_cmp(MPI u, MPI v)
 {
-	mpi_size_t usize;
-	mpi_size_t vsize;
-	int usign;
-	int vsign;
+	mpi_size_t usize, vsize;
 	int cmp;
 
 	mpi_normalize(u);
 	mpi_normalize(v);
-
 	usize = u->nlimbs;
 	vsize = v->nlimbs;
-	usign = absmode ? 0 : u->sign;
-	vsign = absmode ? 0 : v->sign;
-
-	/* Compare sign bits.  */
-
-	if (!usign && vsign)
+	if (!u->sign && v->sign)
 		return 1;
-	if (usign && !vsign)
+	if (u->sign && !v->sign)
 		return -1;
-
-	/* U and V are either both positive or both negative.  */
-
-	if (usize != vsize && !usign && !vsign)
+	if (usize != vsize && !u->sign && !v->sign)
 		return usize - vsize;
-	if (usize != vsize && usign && vsign)
-		return vsize + usize;
+	if (usize != vsize && u->sign && v->sign)
+		return vsize - usize;
 	if (!usize)
 		return 0;
 	cmp = mpihelp_cmp(u->d, v->d, usize);
-	if (!cmp)
-		return 0;
-	if ((cmp < 0?1:0) == (usign?1:0))
-		return 1;
-
-	return -1;
-}
-
-int mpi_cmp(MPI u, MPI v)
-{
-	return do_mpi_cmp(u, v, 0);
+	if (u->sign)
+		return -cmp;
+	return cmp;
 }
 EXPORT_SYMBOL_GPL(mpi_cmp);
-
-int mpi_cmpabs(MPI u, MPI v)
-{
-	return do_mpi_cmp(u, v, 1);
-}
-EXPORT_SYMBOL_GPL(mpi_cmpabs);
diff --git a/lib/crypto/mpi/mpi-div.c b/lib/crypto/mpi/mpi-div.c
index 45beab8b9e9e..6e5044e72595 100644
--- a/lib/crypto/mpi/mpi-div.c
+++ b/lib/crypto/mpi/mpi-div.c
@@ -14,13 +14,13 @@
 #include "mpi-internal.h"
 #include "longlong.h"
 
-void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
-void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor);
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
 
-void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
 {
 	int divisor_sign = divisor->sign;
 	MPI temp_divisor = NULL;
+	int err;
 
 	/* We need the original value of the divisor after the remainder has been
 	 * preliminary calculated.	We have to copy it to temporary space if it's
@@ -28,44 +28,22 @@ void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
 	 */
 	if (rem == divisor) {
 		temp_divisor = mpi_copy(divisor);
+		if (!temp_divisor)
+			return -ENOMEM;
 		divisor = temp_divisor;
 	}
 
-	mpi_tdiv_r(rem, dividend, divisor);
+	err = mpi_tdiv_r(rem, dividend, divisor);
+	if (err)
+		goto free_temp_divisor;
 
 	if (((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs)
-		mpi_add(rem, rem, divisor);
+		err = mpi_add(rem, rem, divisor);
 
-	if (temp_divisor)
-		mpi_free(temp_divisor);
-}
+free_temp_divisor:
+	mpi_free(temp_divisor);
 
-void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor)
-{
-	MPI tmp = mpi_alloc(mpi_get_nlimbs(quot));
-	mpi_fdiv_qr(quot, tmp, dividend, divisor);
-	mpi_free(tmp);
-}
-
-void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
-{
-	int divisor_sign = divisor->sign;
-	MPI temp_divisor = NULL;
-
-	if (quot == divisor || rem == divisor) {
-		temp_divisor = mpi_copy(divisor);
-		divisor = temp_divisor;
-	}
-
-	mpi_tdiv_qr(quot, rem, dividend, divisor);
-
-	if ((divisor_sign ^ dividend->sign) && rem->nlimbs) {
-		mpi_sub_ui(quot, quot, 1);
-		mpi_add(rem, rem, divisor);
-	}
-
-	if (temp_divisor)
-		mpi_free(temp_divisor);
+	return err;
 }
 
 /* If den == quot, den needs temporary storage.
@@ -75,12 +53,12 @@ void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
  *   i.e no extra storage should be allocated.
  */
 
-void mpi_tdiv_r(MPI rem, MPI num, MPI den)
+int mpi_tdiv_r(MPI rem, MPI num, MPI den)
 {
-	mpi_tdiv_qr(NULL, rem, num, den);
+	return mpi_tdiv_qr(NULL, rem, num, den);
 }
 
-void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 {
 	mpi_ptr_t np, dp;
 	mpi_ptr_t qp, rp;
@@ -93,13 +71,16 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 	mpi_limb_t q_limb;
 	mpi_ptr_t marker[5];
 	int markidx = 0;
+	int err;
 
 	/* Ensure space is enough for quotient and remainder.
 	 * We need space for an extra limb in the remainder, because it's
 	 * up-shifted (normalized) below.
 	 */
 	rsize = nsize + 1;
-	mpi_resize(rem, rsize);
+	err = mpi_resize(rem, rsize);
+	if (err)
+		return err;
 
 	qsize = rsize - dsize;	  /* qsize cannot be bigger than this.	*/
 	if (qsize <= 0) {
@@ -115,11 +96,14 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 			quot->nlimbs = 0;
 			quot->sign = 0;
 		}
-		return;
+		return 0;
 	}
 
-	if (quot)
-		mpi_resize(quot, qsize);
+	if (quot) {
+		err = mpi_resize(quot, qsize);
+		if (err)
+			return err;
+	}
 
 	/* Read pointers here, when reallocation is finished.  */
 	np = num->d;
@@ -141,10 +125,10 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 		rsize = rlimb != 0?1:0;
 		rem->nlimbs = rsize;
 		rem->sign = sign_remainder;
-		return;
+		return 0;
 	}
 
-
+	err = -ENOMEM;
 	if (quot) {
 		qp = quot->d;
 		/* Make sure QP and NP point to different objects.  Otherwise the
@@ -152,6 +136,8 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 		 */
 		if (qp == np) { /* Copy NP object to temporary space.  */
 			np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+			if (!np)
+				goto out_free_marker;
 			MPN_COPY(np, qp, nsize);
 		}
 	} else /* Put quotient at top of remainder. */
@@ -172,6 +158,8 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 		 * the original contents of the denominator.
 		 */
 		tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+		if (!tp)
+			goto out_free_marker;
 		mpihelp_lshift(tp, dp, dsize, normalization_steps);
 		dp = tp;
 
@@ -193,6 +181,8 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 			mpi_ptr_t tp;
 
 			tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+			if (!tp)
+				goto out_free_marker;
 			MPN_COPY(tp, dp, dsize);
 			dp = tp;
 		}
@@ -227,8 +217,14 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 
 	rem->nlimbs = rsize;
 	rem->sign	= sign_remainder;
+
+	err = 0;
+
+out_free_marker:
 	while (markidx) {
 		markidx--;
 		mpi_free_limb_space(marker[markidx]);
 	}
+
+	return err;
 }
diff --git a/lib/crypto/mpi/mpi-internal.h b/lib/crypto/mpi/mpi-internal.h
index 554002182db1..8a4f49e3043c 100644
--- a/lib/crypto/mpi/mpi-internal.h
+++ b/lib/crypto/mpi/mpi-internal.h
@@ -52,11 +52,12 @@
 typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
 typedef int mpi_size_t;		/* (must be a signed type) */
 
-#define RESIZE_IF_NEEDED(a, b)			\
-	do {					\
-		if ((a)->alloced < (b))		\
-			mpi_resize((a), (b));	\
-	} while (0)
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+	if (a->alloced < b)
+		return mpi_resize(a, b);
+	return 0;
+}
 
 /* Copy N limbs from S to D.  */
 #define MPN_COPY(d, s, n) \
@@ -66,14 +67,6 @@ typedef int mpi_size_t;		/* (must be a signed type) */
 			(d)[_i] = (s)[_i];	\
 	} while (0)
 
-#define MPN_COPY_INCR(d, s, n)		\
-	do {					\
-		mpi_size_t _i;			\
-		for (_i = 0; _i < (n); _i++)	\
-			(d)[_i] = (s)[_i];	\
-	} while (0)
-
-
 #define MPN_COPY_DECR(d, s, n) \
 	do {					\
 		mpi_size_t _i;			\
@@ -181,8 +174,6 @@ int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
 void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
 		mpi_ptr_t tspace);
-void mpihelp_mul_n(mpi_ptr_t prodp,
-		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
 
 int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			       mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/crypto/mpi/mpi-inv.c b/lib/crypto/mpi/mpi-inv.c
deleted file mode 100644
index 61e37d18f793..000000000000
--- a/lib/crypto/mpi/mpi-inv.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/* mpi-inv.c  -  MPI functions
- *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "mpi-internal.h"
-
-/****************
- * Calculate the multiplicative inverse X of A mod N
- * That is: Find the solution x for
- *		1 = (a*x) mod n
- */
-int mpi_invm(MPI x, MPI a, MPI n)
-{
-	/* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
-	 * modified according to Michael Penk's solution for Exercise 35
-	 * with further enhancement
-	 */
-	MPI u, v, u1, u2 = NULL, u3, v1, v2 = NULL, v3, t1, t2 = NULL, t3;
-	unsigned int k;
-	int sign;
-	int odd;
-
-	if (!mpi_cmp_ui(a, 0))
-		return 0; /* Inverse does not exists.  */
-	if (!mpi_cmp_ui(n, 1))
-		return 0; /* Inverse does not exists.  */
-
-	u = mpi_copy(a);
-	v = mpi_copy(n);
-
-	for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) {
-		mpi_rshift(u, u, 1);
-		mpi_rshift(v, v, 1);
-	}
-	odd = mpi_test_bit(v, 0);
-
-	u1 = mpi_alloc_set_ui(1);
-	if (!odd)
-		u2 = mpi_alloc_set_ui(0);
-	u3 = mpi_copy(u);
-	v1 = mpi_copy(v);
-	if (!odd) {
-		v2 = mpi_alloc(mpi_get_nlimbs(u));
-		mpi_sub(v2, u1, u); /* U is used as const 1 */
-	}
-	v3 = mpi_copy(v);
-	if (mpi_test_bit(u, 0)) { /* u is odd */
-		t1 = mpi_alloc_set_ui(0);
-		if (!odd) {
-			t2 = mpi_alloc_set_ui(1);
-			t2->sign = 1;
-		}
-		t3 = mpi_copy(v);
-		t3->sign = !t3->sign;
-		goto Y4;
-	} else {
-		t1 = mpi_alloc_set_ui(1);
-		if (!odd)
-			t2 = mpi_alloc_set_ui(0);
-		t3 = mpi_copy(u);
-	}
-
-	do {
-		do {
-			if (!odd) {
-				if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) {
-					/* one is odd */
-					mpi_add(t1, t1, v);
-					mpi_sub(t2, t2, u);
-				}
-				mpi_rshift(t1, t1, 1);
-				mpi_rshift(t2, t2, 1);
-				mpi_rshift(t3, t3, 1);
-			} else {
-				if (mpi_test_bit(t1, 0))
-					mpi_add(t1, t1, v);
-				mpi_rshift(t1, t1, 1);
-				mpi_rshift(t3, t3, 1);
-			}
-Y4:
-			;
-		} while (!mpi_test_bit(t3, 0)); /* while t3 is even */
-
-		if (!t3->sign) {
-			mpi_set(u1, t1);
-			if (!odd)
-				mpi_set(u2, t2);
-			mpi_set(u3, t3);
-		} else {
-			mpi_sub(v1, v, t1);
-			sign = u->sign; u->sign = !u->sign;
-			if (!odd)
-				mpi_sub(v2, u, t2);
-			u->sign = sign;
-			sign = t3->sign; t3->sign = !t3->sign;
-			mpi_set(v3, t3);
-			t3->sign = sign;
-		}
-		mpi_sub(t1, u1, v1);
-		if (!odd)
-			mpi_sub(t2, u2, v2);
-		mpi_sub(t3, u3, v3);
-		if (t1->sign) {
-			mpi_add(t1, t1, v);
-			if (!odd)
-				mpi_sub(t2, t2, u);
-		}
-	} while (mpi_cmp_ui(t3, 0)); /* while t3 != 0 */
-	/* mpi_lshift( u3, k ); */
-	mpi_set(x, u1);
-
-	mpi_free(u1);
-	mpi_free(v1);
-	mpi_free(t1);
-	if (!odd) {
-		mpi_free(u2);
-		mpi_free(v2);
-		mpi_free(t2);
-	}
-	mpi_free(u3);
-	mpi_free(v3);
-	mpi_free(t3);
-
-	mpi_free(u);
-	mpi_free(v);
-	return 1;
-}
-EXPORT_SYMBOL_GPL(mpi_invm);
diff --git a/lib/crypto/mpi/mpi-mod.c b/lib/crypto/mpi/mpi-mod.c
index 54fcc01564d9..d5fdaec3d0b6 100644
--- a/lib/crypto/mpi/mpi-mod.c
+++ b/lib/crypto/mpi/mpi-mod.c
@@ -5,153 +5,9 @@
  * This file is part of Libgcrypt.
  */
 
-
 #include "mpi-internal.h"
-#include "longlong.h"
-
-/* Context used with Barrett reduction.  */
-struct barrett_ctx_s {
-	MPI m;   /* The modulus - may not be modified. */
-	int m_copied;   /* If true, M needs to be released.  */
-	int k;
-	MPI y;
-	MPI r1;  /* Helper MPI. */
-	MPI r2;  /* Helper MPI. */
-	MPI r3;  /* Helper MPI allocated on demand. */
-};
-
-
-
-void mpi_mod(MPI rem, MPI dividend, MPI divisor)
-{
-	mpi_fdiv_r(rem, dividend, divisor);
-}
-
-/* This function returns a new context for Barrett based operations on
- * the modulus M.  This context needs to be released using
- * _gcry_mpi_barrett_free.  If COPY is true M will be transferred to
- * the context and the user may change M.  If COPY is false, M may not
- * be changed until gcry_mpi_barrett_free has been called.
- */
-mpi_barrett_t mpi_barrett_init(MPI m, int copy)
-{
-	mpi_barrett_t ctx;
-	MPI tmp;
-
-	mpi_normalize(m);
-	ctx = kcalloc(1, sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return NULL;
-
-	if (copy) {
-		ctx->m = mpi_copy(m);
-		ctx->m_copied = 1;
-	} else
-		ctx->m = m;
-
-	ctx->k = mpi_get_nlimbs(m);
-	tmp = mpi_alloc(ctx->k + 1);
-
-	/* Barrett precalculation: y = floor(b^(2k) / m). */
-	mpi_set_ui(tmp, 1);
-	mpi_lshift_limbs(tmp, 2 * ctx->k);
-	mpi_fdiv_q(tmp, tmp, m);
-
-	ctx->y  = tmp;
-	ctx->r1 = mpi_alloc(2 * ctx->k + 1);
-	ctx->r2 = mpi_alloc(2 * ctx->k + 1);
-
-	return ctx;
-}
-
-void mpi_barrett_free(mpi_barrett_t ctx)
-{
-	if (ctx) {
-		mpi_free(ctx->y);
-		mpi_free(ctx->r1);
-		mpi_free(ctx->r2);
-		if (ctx->r3)
-			mpi_free(ctx->r3);
-		if (ctx->m_copied)
-			mpi_free(ctx->m);
-		kfree(ctx);
-	}
-}
-
-
-/* R = X mod M
- *
- * Using Barrett reduction.  Before using this function
- * _gcry_mpi_barrett_init must have been called to do the
- * precalculations.  CTX is the context created by this precalculation
- * and also conveys M.  If the Barret reduction could no be done a
- * straightforward reduction method is used.
- *
- * We assume that these conditions are met:
- * Input:  x =(x_2k-1 ...x_0)_b
- *     m =(m_k-1 ....m_0)_b	  with m_k-1 != 0
- * Output: r = x mod m
- */
-void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx)
-{
-	MPI m = ctx->m;
-	int k = ctx->k;
-	MPI y = ctx->y;
-	MPI r1 = ctx->r1;
-	MPI r2 = ctx->r2;
-	int sign;
-
-	mpi_normalize(x);
-	if (mpi_get_nlimbs(x) > 2*k) {
-		mpi_mod(r, x, m);
-		return;
-	}
-
-	sign = x->sign;
-	x->sign = 0;
-
-	/* 1. q1 = floor( x / b^k-1)
-	 *    q2 = q1 * y
-	 *    q3 = floor( q2 / b^k+1 )
-	 * Actually, we don't need qx, we can work direct on r2
-	 */
-	mpi_set(r2, x);
-	mpi_rshift_limbs(r2, k-1);
-	mpi_mul(r2, r2, y);
-	mpi_rshift_limbs(r2, k+1);
-
-	/* 2. r1 = x mod b^k+1
-	 *	r2 = q3 * m mod b^k+1
-	 *	r  = r1 - r2
-	 * 3. if r < 0 then  r = r + b^k+1
-	 */
-	mpi_set(r1, x);
-	if (r1->nlimbs > k+1) /* Quick modulo operation.  */
-		r1->nlimbs = k+1;
-	mpi_mul(r2, r2, m);
-	if (r2->nlimbs > k+1) /* Quick modulo operation. */
-		r2->nlimbs = k+1;
-	mpi_sub(r, r1, r2);
-
-	if (mpi_has_sign(r)) {
-		if (!ctx->r3) {
-			ctx->r3 = mpi_alloc(k + 2);
-			mpi_set_ui(ctx->r3, 1);
-			mpi_lshift_limbs(ctx->r3, k + 1);
-		}
-		mpi_add(r, r, ctx->r3);
-	}
-
-	/* 4. while r >= m do r = r - m */
-	while (mpi_cmp(r, m) >= 0)
-		mpi_sub(r, r, m);
-
-	x->sign = sign;
-}
-
 
-void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx)
+int mpi_mod(MPI rem, MPI dividend, MPI divisor)
 {
-	mpi_mul(w, u, v);
-	mpi_mod_barrett(w, w, ctx);
+	return mpi_fdiv_r(rem, dividend, divisor);
 }
diff --git a/lib/crypto/mpi/mpi-mul.c b/lib/crypto/mpi/mpi-mul.c
index 7f4eda8560dc..7e6ff1ce3e9b 100644
--- a/lib/crypto/mpi/mpi-mul.c
+++ b/lib/crypto/mpi/mpi-mul.c
@@ -13,7 +13,7 @@
 
 #include "mpi-internal.h"
 
-void mpi_mul(MPI w, MPI u, MPI v)
+int mpi_mul(MPI w, MPI u, MPI v)
 {
 	mpi_size_t usize, vsize, wsize;
 	mpi_ptr_t up, vp, wp;
@@ -21,6 +21,7 @@ void mpi_mul(MPI w, MPI u, MPI v)
 	int usign, vsign, sign_product;
 	int assign_wp = 0;
 	mpi_ptr_t tmp_limb = NULL;
+	int err = 0;
 
 	if (u->nlimbs < v->nlimbs) {
 		/* Swap U and V. */
@@ -46,15 +47,21 @@ void mpi_mul(MPI w, MPI u, MPI v)
 	if (w->alloced < wsize) {
 		if (wp == up || wp == vp) {
 			wp = mpi_alloc_limb_space(wsize);
+			if (!wp)
+				return -ENOMEM;
 			assign_wp = 1;
 		} else {
-			mpi_resize(w, wsize);
+			err = mpi_resize(w, wsize);
+			if (err)
+				return err;
 			wp = w->d;
 		}
 	} else { /* Make U and V not overlap with W.	*/
 		if (wp == up) {
 			/* W and U are identical.  Allocate temporary space for U. */
 			up = tmp_limb = mpi_alloc_limb_space(usize);
+			if (!up)
+				return -ENOMEM;
 			/* Is V identical too?  Keep it identical with U.  */
 			if (wp == vp)
 				vp = up;
@@ -63,6 +70,8 @@ void mpi_mul(MPI w, MPI u, MPI v)
 		} else if (wp == vp) {
 			/* W and V are identical.  Allocate temporary space for V. */
 			vp = tmp_limb = mpi_alloc_limb_space(vsize);
+			if (!vp)
+				return -ENOMEM;
 			/* Copy to the temporary space.  */
 			MPN_COPY(vp, wp, vsize);
 		}
@@ -71,7 +80,12 @@ void mpi_mul(MPI w, MPI u, MPI v)
 	if (!vsize)
 		wsize = 0;
 	else {
-		mpihelp_mul(wp, up, usize, vp, vsize, &cy);
+		err = mpihelp_mul(wp, up, usize, vp, vsize, &cy);
+		if (err) {
+			if (assign_wp)
+				mpi_free_limb_space(wp);
+			goto free_tmp_limb;
+		}
 		wsize -= cy ? 0:1;
 	}
 
@@ -79,14 +93,17 @@ void mpi_mul(MPI w, MPI u, MPI v)
 		mpi_assign_limb_space(w, wp, wsize);
 	w->nlimbs = wsize;
 	w->sign = sign_product;
+
+free_tmp_limb:
 	if (tmp_limb)
 		mpi_free_limb_space(tmp_limb);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mpi_mul);
 
-void mpi_mulm(MPI w, MPI u, MPI v, MPI m)
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m)
 {
-	mpi_mul(w, u, v);
-	mpi_tdiv_r(w, w, m);
+	return mpi_mul(w, u, v) ?:
+	       mpi_tdiv_r(w, w, m);
 }
 EXPORT_SYMBOL_GPL(mpi_mulm);
diff --git a/lib/crypto/mpi/mpi-pow.c b/lib/crypto/mpi/mpi-pow.c
index 2fd7a46d55ec..67fbd4c2503d 100644
--- a/lib/crypto/mpi/mpi-pow.c
+++ b/lib/crypto/mpi/mpi-pow.c
@@ -176,7 +176,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
 
 		for (;;) {
 			while (c) {
-				mpi_ptr_t tp;
 				mpi_size_t xsize;
 
 				/*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */
@@ -207,9 +206,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
 					xsize = msize;
 				}
 
-				tp = rp;
-				rp = xp;
-				xp = tp;
+				swap(rp, xp);
 				rsize = xsize;
 
 				if ((mpi_limb_signed_t) e < 0) {
@@ -235,9 +232,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
 						xsize = msize;
 					}
 
-					tp = rp;
-					rp = xp;
-					xp = tp;
+					swap(rp, xp);
 					rsize = xsize;
 				}
 				e <<= 1;
diff --git a/lib/crypto/mpi/mpicoder.c b/lib/crypto/mpi/mpicoder.c
index 3cb6bd148fa9..dde01030807d 100644
--- a/lib/crypto/mpi/mpicoder.c
+++ b/lib/crypto/mpi/mpicoder.c
@@ -25,7 +25,6 @@
 #include <linux/string.h>
 #include "mpi-internal.h"
 
-#define MAX_EXTERN_SCAN_BYTES (16*1024*1024)
 #define MAX_EXTERN_MPI_BITS 16384
 
 /**
@@ -110,112 +109,6 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
 
-/****************
- * Fill the mpi VAL from the hex string in STR.
- */
-int mpi_fromstr(MPI val, const char *str)
-{
-	int sign = 0;
-	int prepend_zero = 0;
-	int i, j, c, c1, c2;
-	unsigned int nbits, nbytes, nlimbs;
-	mpi_limb_t a;
-
-	if (*str == '-') {
-		sign = 1;
-		str++;
-	}
-
-	/* Skip optional hex prefix.  */
-	if (*str == '0' && str[1] == 'x')
-		str += 2;
-
-	nbits = strlen(str);
-	if (nbits > MAX_EXTERN_SCAN_BYTES) {
-		mpi_clear(val);
-		return -EINVAL;
-	}
-	nbits *= 4;
-	if ((nbits % 8))
-		prepend_zero = 1;
-
-	nbytes = (nbits+7) / 8;
-	nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
-
-	if (val->alloced < nlimbs)
-		mpi_resize(val, nlimbs);
-
-	i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB);
-	i %= BYTES_PER_MPI_LIMB;
-	j = val->nlimbs = nlimbs;
-	val->sign = sign;
-	for (; j > 0; j--) {
-		a = 0;
-		for (; i < BYTES_PER_MPI_LIMB; i++) {
-			if (prepend_zero) {
-				c1 = '0';
-				prepend_zero = 0;
-			} else
-				c1 = *str++;
-
-			if (!c1) {
-				mpi_clear(val);
-				return -EINVAL;
-			}
-			c2 = *str++;
-			if (!c2) {
-				mpi_clear(val);
-				return -EINVAL;
-			}
-			if (c1 >= '0' && c1 <= '9')
-				c = c1 - '0';
-			else if (c1 >= 'a' && c1 <= 'f')
-				c = c1 - 'a' + 10;
-			else if (c1 >= 'A' && c1 <= 'F')
-				c = c1 - 'A' + 10;
-			else {
-				mpi_clear(val);
-				return -EINVAL;
-			}
-			c <<= 4;
-			if (c2 >= '0' && c2 <= '9')
-				c |= c2 - '0';
-			else if (c2 >= 'a' && c2 <= 'f')
-				c |= c2 - 'a' + 10;
-			else if (c2 >= 'A' && c2 <= 'F')
-				c |= c2 - 'A' + 10;
-			else {
-				mpi_clear(val);
-				return -EINVAL;
-			}
-			a <<= 8;
-			a |= c;
-		}
-		i = 0;
-		val->d[j-1] = a;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mpi_fromstr);
-
-MPI mpi_scanval(const char *string)
-{
-	MPI a;
-
-	a = mpi_alloc(0);
-	if (!a)
-		return NULL;
-
-	if (mpi_fromstr(a, string)) {
-		mpi_free(a);
-		return NULL;
-	}
-	mpi_normalize(a);
-	return a;
-}
-EXPORT_SYMBOL_GPL(mpi_scanval);
-
 static int count_lzeros(MPI a)
 {
 	mpi_limb_t alimb;
@@ -521,232 +414,3 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
-
-/* Perform a two's complement operation on buffer P of size N bytes.  */
-static void twocompl(unsigned char *p, unsigned int n)
-{
-	int i;
-
-	for (i = n-1; i >= 0 && !p[i]; i--)
-		;
-	if (i >= 0) {
-		if ((p[i] & 0x01))
-			p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff);
-		else if ((p[i] & 0x02))
-			p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe);
-		else if ((p[i] & 0x04))
-			p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc);
-		else if ((p[i] & 0x08))
-			p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8);
-		else if ((p[i] & 0x10))
-			p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0);
-		else if ((p[i] & 0x20))
-			p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0);
-		else if ((p[i] & 0x40))
-			p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0);
-		else
-			p[i] = 0x80;
-
-		for (i--; i >= 0; i--)
-			p[i] ^= 0xff;
-	}
-}
-
-int mpi_print(enum gcry_mpi_format format, unsigned char *buffer,
-			size_t buflen, size_t *nwritten, MPI a)
-{
-	unsigned int nbits = mpi_get_nbits(a);
-	size_t len;
-	size_t dummy_nwritten;
-	int negative;
-
-	if (!nwritten)
-		nwritten = &dummy_nwritten;
-
-	/* Libgcrypt does no always care to set clear the sign if the value
-	 * is 0.  For printing this is a bit of a surprise, in particular
-	 * because if some of the formats don't support negative numbers but
-	 * should be able to print a zero.  Thus we need this extra test
-	 * for a negative number.
-	 */
-	if (a->sign && mpi_cmp_ui(a, 0))
-		negative = 1;
-	else
-		negative = 0;
-
-	len = buflen;
-	*nwritten = 0;
-	if (format == GCRYMPI_FMT_STD) {
-		unsigned char *tmp;
-		int extra = 0;
-		unsigned int n;
-
-		tmp = mpi_get_buffer(a, &n, NULL);
-		if (!tmp)
-			return -EINVAL;
-
-		if (negative) {
-			twocompl(tmp, n);
-			if (!(*tmp & 0x80)) {
-				/* Need to extend the sign.  */
-				n++;
-				extra = 2;
-			}
-		} else if (n && (*tmp & 0x80)) {
-			/* Positive but the high bit of the returned buffer is set.
-			 * Thus we need to print an extra leading 0x00 so that the
-			 * output is interpreted as a positive number.
-			 */
-			n++;
-			extra = 1;
-		}
-
-		if (buffer && n > len) {
-			/* The provided buffer is too short. */
-			kfree(tmp);
-			return -E2BIG;
-		}
-		if (buffer) {
-			unsigned char *s = buffer;
-
-			if (extra == 1)
-				*s++ = 0;
-			else if (extra)
-				*s++ = 0xff;
-			memcpy(s, tmp, n-!!extra);
-		}
-		kfree(tmp);
-		*nwritten = n;
-		return 0;
-	} else if (format == GCRYMPI_FMT_USG) {
-		unsigned int n = (nbits + 7)/8;
-
-		/* Note:  We ignore the sign for this format.  */
-		/* FIXME: for performance reasons we should put this into
-		 * mpi_aprint because we can then use the buffer directly.
-		 */
-
-		if (buffer && n > len)
-			return -E2BIG;
-		if (buffer) {
-			unsigned char *tmp;
-
-			tmp = mpi_get_buffer(a, &n, NULL);
-			if (!tmp)
-				return -EINVAL;
-			memcpy(buffer, tmp, n);
-			kfree(tmp);
-		}
-		*nwritten = n;
-		return 0;
-	} else if (format == GCRYMPI_FMT_PGP) {
-		unsigned int n = (nbits + 7)/8;
-
-		/* The PGP format can only handle unsigned integers.  */
-		if (negative)
-			return -EINVAL;
-
-		if (buffer && n+2 > len)
-			return -E2BIG;
-
-		if (buffer) {
-			unsigned char *tmp;
-			unsigned char *s = buffer;
-
-			s[0] = nbits >> 8;
-			s[1] = nbits;
-
-			tmp = mpi_get_buffer(a, &n, NULL);
-			if (!tmp)
-				return -EINVAL;
-			memcpy(s+2, tmp, n);
-			kfree(tmp);
-		}
-		*nwritten = n+2;
-		return 0;
-	} else if (format == GCRYMPI_FMT_SSH) {
-		unsigned char *tmp;
-		int extra = 0;
-		unsigned int n;
-
-		tmp = mpi_get_buffer(a, &n, NULL);
-		if (!tmp)
-			return -EINVAL;
-
-		if (negative) {
-			twocompl(tmp, n);
-			if (!(*tmp & 0x80)) {
-				/* Need to extend the sign.  */
-				n++;
-				extra = 2;
-			}
-		} else if (n && (*tmp & 0x80)) {
-			n++;
-			extra = 1;
-		}
-
-		if (buffer && n+4 > len) {
-			kfree(tmp);
-			return -E2BIG;
-		}
-
-		if (buffer) {
-			unsigned char *s = buffer;
-
-			*s++ = n >> 24;
-			*s++ = n >> 16;
-			*s++ = n >> 8;
-			*s++ = n;
-			if (extra == 1)
-				*s++ = 0;
-			else if (extra)
-				*s++ = 0xff;
-			memcpy(s, tmp, n-!!extra);
-		}
-		kfree(tmp);
-		*nwritten = 4+n;
-		return 0;
-	} else if (format == GCRYMPI_FMT_HEX) {
-		unsigned char *tmp;
-		int i;
-		int extra = 0;
-		unsigned int n = 0;
-
-		tmp = mpi_get_buffer(a, &n, NULL);
-		if (!tmp)
-			return -EINVAL;
-		if (!n || (*tmp & 0x80))
-			extra = 2;
-
-		if (buffer && 2*n + extra + negative + 1 > len) {
-			kfree(tmp);
-			return -E2BIG;
-		}
-		if (buffer) {
-			unsigned char *s = buffer;
-
-			if (negative)
-				*s++ = '-';
-			if (extra) {
-				*s++ = '0';
-				*s++ = '0';
-			}
-
-			for (i = 0; i < n; i++) {
-				unsigned int c = tmp[i];
-
-				*s++ = (c >> 4) < 10 ? '0'+(c>>4) : 'A'+(c>>4)-10;
-				c &= 15;
-				*s++ = c < 10 ? '0'+c : 'A'+c-10;
-			}
-			*s++ = 0;
-			*nwritten = s - buffer;
-		} else {
-			*nwritten = 2*n + extra + negative + 1;
-		}
-		kfree(tmp);
-		return 0;
-	} else
-		return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(mpi_print);
diff --git a/lib/crypto/mpi/mpih-mul.c b/lib/crypto/mpi/mpih-mul.c
index e5f1c84e3c48..a93647564054 100644
--- a/lib/crypto/mpi/mpih-mul.c
+++ b/lib/crypto/mpi/mpih-mul.c
@@ -317,31 +317,6 @@ mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
 	}
 }
 
-
-void mpihelp_mul_n(mpi_ptr_t prodp,
-		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
-{
-	if (up == vp) {
-		if (size < KARATSUBA_THRESHOLD)
-			mpih_sqr_n_basecase(prodp, up, size);
-		else {
-			mpi_ptr_t tspace;
-			tspace = mpi_alloc_limb_space(2 * size);
-			mpih_sqr_n(prodp, up, size, tspace);
-			mpi_free_limb_space(tspace);
-		}
-	} else {
-		if (size < KARATSUBA_THRESHOLD)
-			mul_n_basecase(prodp, up, vp, size);
-		else {
-			mpi_ptr_t tspace;
-			tspace = mpi_alloc_limb_space(2 * size);
-			mul_n(prodp, up, vp, size, tspace);
-			mpi_free_limb_space(tspace);
-		}
-	}
-}
-
 int
 mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			   mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/crypto/mpi/mpiutil.c b/lib/crypto/mpi/mpiutil.c
index aa8c46544af8..979ece5a81d2 100644
--- a/lib/crypto/mpi/mpiutil.c
+++ b/lib/crypto/mpi/mpiutil.c
@@ -20,63 +20,6 @@
 
 #include "mpi-internal.h"
 
-/* Constants allocated right away at startup.  */
-static MPI constants[MPI_NUMBER_OF_CONSTANTS];
-
-/* Initialize the MPI subsystem.  This is called early and allows to
- * do some initialization without taking care of threading issues.
- */
-static int __init mpi_init(void)
-{
-	int idx;
-	unsigned long value;
-
-	for (idx = 0; idx < MPI_NUMBER_OF_CONSTANTS; idx++) {
-		switch (idx) {
-		case MPI_C_ZERO:
-			value = 0;
-			break;
-		case MPI_C_ONE:
-			value = 1;
-			break;
-		case MPI_C_TWO:
-			value = 2;
-			break;
-		case MPI_C_THREE:
-			value = 3;
-			break;
-		case MPI_C_FOUR:
-			value = 4;
-			break;
-		case MPI_C_EIGHT:
-			value = 8;
-			break;
-		default:
-			pr_err("MPI: invalid mpi_const selector %d\n", idx);
-			return -EFAULT;
-		}
-		constants[idx] = mpi_alloc_set_ui(value);
-		constants[idx]->flags = (16|32);
-	}
-
-	return 0;
-}
-postcore_initcall(mpi_init);
-
-/* Return a constant MPI descripbed by NO which is one of the
- * MPI_C_xxx macros.  There is no need to copy this returned value; it
- * may be used directly.
- */
-MPI mpi_const(enum gcry_mpi_constants no)
-{
-	if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS)
-		pr_err("MPI: invalid mpi_const selector %d\n", no);
-	if (!constants[no])
-		pr_err("MPI: MPI subsystem not initialized\n");
-	return constants[no];
-}
-EXPORT_SYMBOL_GPL(mpi_const);
-
 /****************
  * Note:  It was a bad idea to use the number of limbs to allocate
  *	  because on a alpha the limbs are large but we normally need
@@ -163,15 +106,6 @@ int mpi_resize(MPI a, unsigned nlimbs)
 	return 0;
 }
 
-void mpi_clear(MPI a)
-{
-	if (!a)
-		return;
-	a->nlimbs = 0;
-	a->flags = 0;
-}
-EXPORT_SYMBOL_GPL(mpi_clear);
-
 void mpi_free(MPI a)
 {
 	if (!a)
@@ -199,6 +133,8 @@ MPI mpi_copy(MPI a)
 
 	if (a) {
 		b = mpi_alloc(a->nlimbs);
+		if (!b)
+			return NULL;
 		b->nlimbs = a->nlimbs;
 		b->sign = a->sign;
 		b->flags = a->flags;
@@ -210,121 +146,5 @@ MPI mpi_copy(MPI a)
 	return b;
 }
 
-/****************
- * This function allocates an MPI which is optimized to hold
- * a value as large as the one given in the argument and allocates it
- * with the same flags as A.
- */
-MPI mpi_alloc_like(MPI a)
-{
-	MPI b;
-
-	if (a) {
-		b = mpi_alloc(a->nlimbs);
-		b->nlimbs = 0;
-		b->sign = 0;
-		b->flags = a->flags;
-	} else
-		b = NULL;
-
-	return b;
-}
-
-
-/* Set U into W and release U.  If W is NULL only U will be released. */
-void mpi_snatch(MPI w, MPI u)
-{
-	if (w) {
-		mpi_assign_limb_space(w, u->d, u->alloced);
-		w->nlimbs = u->nlimbs;
-		w->sign   = u->sign;
-		w->flags  = u->flags;
-		u->alloced = 0;
-		u->nlimbs = 0;
-		u->d = NULL;
-	}
-	mpi_free(u);
-}
-
-
-MPI mpi_set(MPI w, MPI u)
-{
-	mpi_ptr_t wp, up;
-	mpi_size_t usize = u->nlimbs;
-	int usign = u->sign;
-
-	if (!w)
-		w = mpi_alloc(mpi_get_nlimbs(u));
-	RESIZE_IF_NEEDED(w, usize);
-	wp = w->d;
-	up = u->d;
-	MPN_COPY(wp, up, usize);
-	w->nlimbs = usize;
-	w->flags = u->flags;
-	w->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
-	w->sign = usign;
-	return w;
-}
-EXPORT_SYMBOL_GPL(mpi_set);
-
-MPI mpi_set_ui(MPI w, unsigned long u)
-{
-	if (!w)
-		w = mpi_alloc(1);
-	/* FIXME: If U is 0 we have no need to resize and thus possible
-	 * allocating the limbs.
-	 */
-	RESIZE_IF_NEEDED(w, 1);
-	w->d[0] = u;
-	w->nlimbs = u ? 1 : 0;
-	w->sign = 0;
-	w->flags = 0;
-	return w;
-}
-EXPORT_SYMBOL_GPL(mpi_set_ui);
-
-MPI mpi_alloc_set_ui(unsigned long u)
-{
-	MPI w = mpi_alloc(1);
-	w->d[0] = u;
-	w->nlimbs = u ? 1 : 0;
-	w->sign = 0;
-	return w;
-}
-
-/****************
- * Swap the value of A and B, when SWAP is 1.
- * Leave the value when SWAP is 0.
- * This implementation should be constant-time regardless of SWAP.
- */
-void mpi_swap_cond(MPI a, MPI b, unsigned long swap)
-{
-	mpi_size_t i;
-	mpi_size_t nlimbs;
-	mpi_limb_t mask = ((mpi_limb_t)0) - swap;
-	mpi_limb_t x;
-
-	if (a->alloced > b->alloced)
-		nlimbs = b->alloced;
-	else
-		nlimbs = a->alloced;
-	if (a->nlimbs > nlimbs || b->nlimbs > nlimbs)
-		return;
-
-	for (i = 0; i < nlimbs; i++) {
-		x = mask & (a->d[i] ^ b->d[i]);
-		a->d[i] = a->d[i] ^ x;
-		b->d[i] = b->d[i] ^ x;
-	}
-
-	x = mask & (a->nlimbs ^ b->nlimbs);
-	a->nlimbs = a->nlimbs ^ x;
-	b->nlimbs = b->nlimbs ^ x;
-
-	x = mask & (a->sign ^ b->sign);
-	a->sign = a->sign ^ x;
-	b->sign = b->sign ^ x;
-}
-
 MODULE_DESCRIPTION("Multiprecision maths library");
 MODULE_LICENSE("GPL");
diff --git a/lib/crypto/poly1305-donna32.c b/lib/crypto/poly1305-donna32.c
index 7fb71845cc84..0a4a2d99e365 100644
--- a/lib/crypto/poly1305-donna32.c
+++ b/lib/crypto/poly1305-donna32.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/internal/poly1305.h>
 
 void poly1305_core_setkey(struct poly1305_core_key *key,
diff --git a/lib/crypto/poly1305-donna64.c b/lib/crypto/poly1305-donna64.c
index 988702c9b3b2..530287531b2e 100644
--- a/lib/crypto/poly1305-donna64.c
+++ b/lib/crypto/poly1305-donna64.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/internal/poly1305.h>
 
 void poly1305_core_setkey(struct poly1305_core_key *key,
diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
index 26d87fc3823e..6e80214ebad8 100644
--- a/lib/crypto/poly1305.c
+++ b/lib/crypto/poly1305.c
@@ -10,7 +10,7 @@
 #include <crypto/internal/poly1305.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 void poly1305_init_generic(struct poly1305_desc_ctx *desc,
 			   const u8 key[POLY1305_KEY_SIZE])
@@ -76,3 +76,4 @@ EXPORT_SYMBOL_GPL(poly1305_final_generic);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("Poly1305 authenticator algorithm, RFC7539");
diff --git a/lib/crypto/sha1.c b/lib/crypto/sha1.c
index 1aebe7be9401..ebb60519ae93 100644
--- a/lib/crypto/sha1.c
+++ b/lib/crypto/sha1.c
@@ -12,7 +12,7 @@
 #include <linux/bitops.h>
 #include <linux/string.h>
 #include <crypto/sha1.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 /*
  * If you have 32 registers or more, the compiler can (and should)
@@ -137,4 +137,5 @@ void sha1_init(__u32 *buf)
 }
 EXPORT_SYMBOL(sha1_init);
 
+MODULE_DESCRIPTION("SHA-1 Algorithm");
 MODULE_LICENSE("GPL");
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 3ac1ef8677db..04c1f2557e6c 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -11,7 +11,7 @@
  * Copyright (c) 2014 Red Hat Inc.
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/sha256_base.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -165,4 +165,5 @@ void sha256(const u8 *data, unsigned int len, u8 *out)
 }
 EXPORT_SYMBOL(sha256);
 
+MODULE_DESCRIPTION("SHA-256 Algorithm");
 MODULE_LICENSE("GPL");
diff --git a/lib/crypto/simd.c b/lib/crypto/simd.c
new file mode 100644
index 000000000000..9c36cb3bb49c
--- /dev/null
+++ b/lib/crypto/simd.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SIMD testing utility functions
+ *
+ * Copyright 2024 Google LLC
+ */
+
+#include <crypto/internal/simd.h>
+
+DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
+EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
diff --git a/lib/crypto/utils.c b/lib/crypto/utils.c
index c852c7151b0a..87da2a6dd161 100644
--- a/lib/crypto/utils.c
+++ b/lib/crypto/utils.c
@@ -5,7 +5,7 @@
  * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <crypto/utils.h>
 #include <linux/module.h>
 
@@ -85,4 +85,5 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 }
 EXPORT_SYMBOL_GPL(__crypto_xor);
 
+MODULE_DESCRIPTION("Crypto library utility functions");
 MODULE_LICENSE("GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index fb12a9bacd2f..7f50c4480a4e 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -7,25 +7,30 @@
 
 #define pr_fmt(fmt) "ODEBUG: " fmt
 
+#include <linux/cpu.h>
 #include <linux/debugobjects.h>
-#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <linux/hash.h>
+#include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
-#include <linux/debugfs.h>
 #include <linux/slab.h>
-#include <linux/hash.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
+#include <linux/static_key.h>
 
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE	1024
-#define ODEBUG_POOL_MIN_LEVEL	256
-#define ODEBUG_POOL_PERCPU_SIZE	64
+/* Must be power of two */
 #define ODEBUG_BATCH_SIZE	16
 
+/* Initial values. Must all be a multiple of batch size */
+#define ODEBUG_POOL_SIZE	(64 * ODEBUG_BATCH_SIZE)
+#define ODEBUG_POOL_MIN_LEVEL	(ODEBUG_POOL_SIZE / 4)
+
+#define ODEBUG_POOL_PERCPU_SIZE	(8 * ODEBUG_BATCH_SIZE)
+
 #define ODEBUG_CHUNK_SHIFT	PAGE_SHIFT
 #define ODEBUG_CHUNK_SIZE	(1 << ODEBUG_CHUNK_SHIFT)
 #define ODEBUG_CHUNK_MASK	(~(ODEBUG_CHUNK_SIZE - 1))
@@ -35,7 +40,7 @@
  * frequency of 10Hz and about 1024 objects for each freeing operation.
  * So it is freeing at most 10k debug objects per second.
  */
-#define ODEBUG_FREE_WORK_MAX	1024
+#define ODEBUG_FREE_WORK_MAX	(1024 / ODEBUG_BATCH_SIZE)
 #define ODEBUG_FREE_WORK_DELAY	DIV_ROUND_UP(HZ, 10)
 
 struct debug_bucket {
@@ -43,16 +48,24 @@ struct debug_bucket {
 	raw_spinlock_t		lock;
 };
 
-/*
- * Debug object percpu free list
- * Access is protected by disabling irq
- */
-struct debug_percpu_free {
-	struct hlist_head	free_objs;
-	int			obj_free;
+struct pool_stats {
+	unsigned int		cur_used;
+	unsigned int		max_used;
+	unsigned int		min_fill;
 };
 
-static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool);
+struct obj_pool {
+	struct hlist_head	objects;
+	unsigned int		cnt;
+	unsigned int		min_cnt;
+	unsigned int		max_cnt;
+	struct pool_stats	stats;
+} ____cacheline_aligned;
+
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu)  = {
+	.max_cnt	= ODEBUG_POOL_PERCPU_SIZE,
+};
 
 static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
 
@@ -60,59 +73,56 @@ static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
-static HLIST_HEAD(obj_pool);
-static HLIST_HEAD(obj_to_free);
+static struct obj_pool pool_global = {
+	.min_cnt		= ODEBUG_POOL_MIN_LEVEL,
+	.max_cnt		= ODEBUG_POOL_SIZE,
+	.stats			= {
+		.min_fill	= ODEBUG_POOL_SIZE,
+	},
+};
 
-/*
- * Because of the presence of percpu free pools, obj_pool_free will
- * under-count those in the percpu free pools. Similarly, obj_pool_used
- * will over-count those in the percpu free pools. Adjustments will be
- * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used
- * can be off.
- */
-static int			obj_pool_min_free = ODEBUG_POOL_SIZE;
-static int			obj_pool_free = ODEBUG_POOL_SIZE;
-static int			obj_pool_used;
-static int			obj_pool_max_used;
+static struct obj_pool pool_to_free = {
+	.max_cnt	= UINT_MAX,
+};
+
+static HLIST_HEAD(pool_boot);
+
+static unsigned long		avg_usage;
 static bool			obj_freeing;
-/* The number of objs on the global free list */
-static int			obj_nr_tofree;
-
-static int			debug_objects_maxchain __read_mostly;
-static int __maybe_unused	debug_objects_maxchecked __read_mostly;
-static int			debug_objects_fixups __read_mostly;
-static int			debug_objects_warnings __read_mostly;
-static int			debug_objects_enabled __read_mostly
-				= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int			debug_objects_pool_size __read_mostly
-				= ODEBUG_POOL_SIZE;
-static int			debug_objects_pool_min_level __read_mostly
-				= ODEBUG_POOL_MIN_LEVEL;
-static const struct debug_obj_descr *descr_test  __read_mostly;
-static struct kmem_cache	*obj_cache __ro_after_init;
+
+static int __data_racy			debug_objects_maxchain __read_mostly;
+static int __data_racy __maybe_unused	debug_objects_maxchecked __read_mostly;
+static int __data_racy			debug_objects_fixups __read_mostly;
+static int __data_racy			debug_objects_warnings __read_mostly;
+static bool __data_racy			debug_objects_enabled __read_mostly
+					= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
+
+static const struct debug_obj_descr	*descr_test  __read_mostly;
+static struct kmem_cache		*obj_cache __ro_after_init;
 
 /*
  * Track numbers of kmem_cache_alloc()/free() calls done.
  */
-static int			debug_objects_allocated;
-static int			debug_objects_freed;
+static int __data_racy		debug_objects_allocated;
+static int __data_racy		debug_objects_freed;
 
 static void free_obj_work(struct work_struct *work);
 static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);
 
+static DEFINE_STATIC_KEY_FALSE(obj_cache_enabled);
+
 static int __init enable_object_debug(char *str)
 {
-	debug_objects_enabled = 1;
+	debug_objects_enabled = true;
 	return 0;
 }
+early_param("debug_objects", enable_object_debug);
 
 static int __init disable_object_debug(char *str)
 {
-	debug_objects_enabled = 0;
+	debug_objects_enabled = false;
 	return 0;
 }
-
-early_param("debug_objects", enable_object_debug);
 early_param("no_debug_objects", disable_object_debug);
 
 static const char *obj_states[ODEBUG_STATE_MAX] = {
@@ -124,62 +134,280 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 	[ODEBUG_STATE_NOTAVAILABLE]	= "not available",
 };
 
-static void fill_pool(void)
+static __always_inline unsigned int pool_count(struct obj_pool *pool)
+{
+	return READ_ONCE(pool->cnt);
+}
+
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
+{
+	return pool_count(pool) < pool->min_cnt;
+}
+
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
 {
-	gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
+	return pool_count(pool) < pool->min_cnt / 2;
+}
+
+static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src)
+{
+	struct hlist_node *last, *next_batch, *first_batch;
 	struct debug_obj *obj;
-	unsigned long flags;
 
-	if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level))
+	if (dst->cnt >= dst->max_cnt || !src->cnt)
+		return false;
+
+	first_batch = src->objects.first;
+	obj = hlist_entry(first_batch, typeof(*obj), node);
+	last = obj->batch_last;
+	next_batch = last->next;
+
+	/* Move the next batch to the front of the source pool */
+	src->objects.first = next_batch;
+	if (next_batch)
+		next_batch->pprev = &src->objects.first;
+
+	/* Add the extracted batch to the destination pool */
+	last->next = dst->objects.first;
+	if (last->next)
+		last->next->pprev = &last->next;
+	first_batch->pprev = &dst->objects.first;
+	dst->objects.first = first_batch;
+
+	WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
+	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+	return true;
+}
+
+static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
+{
+	struct hlist_node *last;
+	struct debug_obj *obj;
+
+	if (dst->cnt >= dst->max_cnt)
+		return false;
+
+	obj = hlist_entry(head->first, typeof(*obj), node);
+	last = obj->batch_last;
+
+	hlist_splice_init(head, last, &dst->objects);
+	WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+	return true;
+}
+
+static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
+{
+	struct hlist_node *last, *next;
+	struct debug_obj *obj;
+
+	if (!src->cnt)
+		return false;
+
+	/* Move the complete list to the head */
+	hlist_move_list(&src->objects, head);
+
+	obj = hlist_entry(head->first, typeof(*obj), node);
+	last = obj->batch_last;
+	next = last->next;
+	/* Disconnect the batch from the list */
+	last->next = NULL;
+
+	/* Move the node after last back to the source pool. */
+	src->objects.first = next;
+	if (next)
+		next->pprev = &src->objects.first;
+
+	WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
+	return true;
+}
+
+static struct debug_obj *__alloc_object(struct hlist_head *list)
+{
+	struct debug_obj *obj;
+
+	if (unlikely(!list->first))
+		return NULL;
+
+	obj = hlist_entry(list->first, typeof(*obj), node);
+	hlist_del(&obj->node);
+	return obj;
+}
+
+static void pcpu_refill_stats(void)
+{
+	struct pool_stats *stats = &pool_global.stats;
+
+	WRITE_ONCE(stats->cur_used, stats->cur_used + ODEBUG_BATCH_SIZE);
+
+	if (stats->cur_used > stats->max_used)
+		stats->max_used = stats->cur_used;
+
+	if (pool_global.cnt < stats->min_fill)
+		stats->min_fill = pool_global.cnt;
+}
+
+static struct debug_obj *pcpu_alloc(void)
+{
+	struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
+
+	lockdep_assert_irqs_disabled();
+
+	for (;;) {
+		struct debug_obj *obj = __alloc_object(&pcp->objects);
+
+		if (likely(obj)) {
+			pcp->cnt--;
+			/*
+			 * If this emptied a batch try to refill from the
+			 * free pool. Don't do that if this was the top-most
+			 * batch as pcpu_free() expects the per CPU pool
+			 * to be less than ODEBUG_POOL_PERCPU_SIZE.
+			 */
+			if (unlikely(pcp->cnt < (ODEBUG_POOL_PERCPU_SIZE - ODEBUG_BATCH_SIZE) &&
+				     !(pcp->cnt % ODEBUG_BATCH_SIZE))) {
+				/*
+				 * Don't try to allocate from the regular pool here
+				 * to not exhaust it prematurely.
+				 */
+				if (pool_count(&pool_to_free)) {
+					guard(raw_spinlock)(&pool_lock);
+					pool_move_batch(pcp, &pool_to_free);
+					pcpu_refill_stats();
+				}
+			}
+			return obj;
+		}
+
+		guard(raw_spinlock)(&pool_lock);
+		if (!pool_move_batch(pcp, &pool_to_free)) {
+			if (!pool_move_batch(pcp, &pool_global))
+				return NULL;
+		}
+		pcpu_refill_stats();
+	}
+}
+
+static void pcpu_free(struct debug_obj *obj)
+{
+	struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
+	struct debug_obj *first;
+
+	lockdep_assert_irqs_disabled();
+
+	if (!(pcp->cnt % ODEBUG_BATCH_SIZE)) {
+		obj->batch_last = &obj->node;
+	} else {
+		first = hlist_entry(pcp->objects.first, typeof(*first), node);
+		obj->batch_last = first->batch_last;
+	}
+	hlist_add_head(&obj->node, &pcp->objects);
+	pcp->cnt++;
+
+	/* Pool full ? */
+	if (pcp->cnt < ODEBUG_POOL_PERCPU_SIZE)
+		return;
+
+	/* Remove a batch from the per CPU pool */
+	guard(raw_spinlock)(&pool_lock);
+	/* Try to fit the batch into the pool_global first */
+	if (!pool_move_batch(&pool_global, pcp))
+		pool_move_batch(&pool_to_free, pcp);
+	WRITE_ONCE(pool_global.stats.cur_used, pool_global.stats.cur_used - ODEBUG_BATCH_SIZE);
+}
+
+static void free_object_list(struct hlist_head *head)
+{
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
+	int cnt = 0;
+
+	hlist_for_each_entry_safe(obj, tmp, head, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+		cnt++;
+	}
+	debug_objects_freed += cnt;
+}
+
+static void fill_pool_from_freelist(void)
+{
+	static unsigned long state;
+
+	/*
+	 * Reuse objs from the global obj_to_free list; they will be
+	 * reinitialized when allocating.
+	 */
+	if (!pool_count(&pool_to_free))
 		return;
 
 	/*
-	 * Reuse objs from the global free list; they will be reinitialized
-	 * when allocating.
-	 *
-	 * Both obj_nr_tofree and obj_pool_free are checked locklessly; the
-	 * READ_ONCE()s pair with the WRITE_ONCE()s in pool_lock critical
-	 * sections.
+	 * Prevent the context from being scheduled or interrupted after
+	 * setting the state flag;
 	 */
-	while (READ_ONCE(obj_nr_tofree) && (READ_ONCE(obj_pool_free) < obj_pool_min_free)) {
-		raw_spin_lock_irqsave(&pool_lock, flags);
-		/*
-		 * Recheck with the lock held as the worker thread might have
-		 * won the race and freed the global free list already.
-		 */
-		while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
-			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-			hlist_del(&obj->node);
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
-			hlist_add_head(&obj->node, &obj_pool);
-			WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
+	guard(irqsave)();
+
+	/*
+	 * Avoid lock contention on &pool_lock and avoid making the cache
+	 * line exclusive by testing the bit before attempting to set it.
+	 */
+	if (test_bit(0, &state) || test_and_set_bit(0, &state))
+		return;
+
+	/* Avoid taking the lock when there is no work to do */
+	while (pool_should_refill(&pool_global) && pool_count(&pool_to_free)) {
+		guard(raw_spinlock)(&pool_lock);
+		/* Move a batch if possible */
+		pool_move_batch(&pool_global, &pool_to_free);
+	}
+	clear_bit(0, &state);
+}
+
+static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
+{
+	struct hlist_node *last = NULL;
+	struct debug_obj *obj;
+
+	for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+		obj = kmem_cache_zalloc(cache, gfp);
+		if (!obj) {
+			free_object_list(head);
+			return false;
 		}
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
+		debug_objects_allocated++;
+
+		if (!last)
+			last = &obj->node;
+		obj->batch_last = last;
+
+		hlist_add_head(&obj->node, head);
 	}
+	return true;
+}
+
+static void fill_pool(void)
+{
+	static atomic_t cpus_allocating;
 
-	if (unlikely(!obj_cache))
+	/*
+	 * Avoid allocation and lock contention when:
+	 *   - One other CPU is already allocating
+	 *   - the global pool has not reached the critical level yet
+	 */
+	if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
 		return;
 
-	while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) {
-		struct debug_obj *new[ODEBUG_BATCH_SIZE];
-		int cnt;
+	atomic_inc(&cpus_allocating);
+	while (pool_should_refill(&pool_global)) {
+		HLIST_HEAD(head);
 
-		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
-			new[cnt] = kmem_cache_zalloc(obj_cache, gfp);
-			if (!new[cnt])
-				break;
-		}
-		if (!cnt)
-			return;
+		if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
+			break;
 
-		raw_spin_lock_irqsave(&pool_lock, flags);
-		while (cnt) {
-			hlist_add_head(&new[--cnt]->node, &obj_pool);
-			debug_objects_allocated++;
-			WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		}
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
+		guard(raw_spinlock_irqsave)(&pool_lock);
+		if (!pool_push_batch(&pool_global, &head))
+			pool_push_batch(&pool_to_free, &head);
 	}
+	atomic_dec(&cpus_allocating);
 }
 
 /*
@@ -201,72 +429,37 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
 	return NULL;
 }
 
-/*
- * Allocate a new object from the hlist
- */
-static struct debug_obj *__alloc_object(struct hlist_head *list)
+static void calc_usage(void)
 {
-	struct debug_obj *obj = NULL;
+	static DEFINE_RAW_SPINLOCK(avg_lock);
+	static unsigned long avg_period;
+	unsigned long cur, now = jiffies;
 
-	if (list->first) {
-		obj = hlist_entry(list->first, typeof(*obj), node);
-		hlist_del(&obj->node);
-	}
+	if (!time_after_eq(now, READ_ONCE(avg_period)))
+		return;
 
-	return obj;
+	if (!raw_spin_trylock(&avg_lock))
+		return;
+
+	WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
+	cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
+	WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
+	raw_spin_unlock(&avg_lock);
 }
 
-static struct debug_obj *
-alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
+static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
+				      const struct debug_obj_descr *descr)
 {
-	struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
 	struct debug_obj *obj;
 
-	if (likely(obj_cache)) {
-		obj = __alloc_object(&percpu_pool->free_objs);
-		if (obj) {
-			percpu_pool->obj_free--;
-			goto init_obj;
-		}
-	}
-
-	raw_spin_lock(&pool_lock);
-	obj = __alloc_object(&obj_pool);
-	if (obj) {
-		obj_pool_used++;
-		WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-
-		/*
-		 * Looking ahead, allocate one batch of debug objects and
-		 * put them into the percpu free pool.
-		 */
-		if (likely(obj_cache)) {
-			int i;
-
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				struct debug_obj *obj2;
-
-				obj2 = __alloc_object(&obj_pool);
-				if (!obj2)
-					break;
-				hlist_add_head(&obj2->node,
-					       &percpu_pool->free_objs);
-				percpu_pool->obj_free++;
-				obj_pool_used++;
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-			}
-		}
+	calc_usage();
 
-		if (obj_pool_used > obj_pool_max_used)
-			obj_pool_max_used = obj_pool_used;
-
-		if (obj_pool_free < obj_pool_min_free)
-			obj_pool_min_free = obj_pool_free;
-	}
-	raw_spin_unlock(&pool_lock);
+	if (static_branch_likely(&obj_cache_enabled))
+		obj = pcpu_alloc();
+	else
+		obj = __alloc_object(&pool_boot);
 
-init_obj:
-	if (obj) {
+	if (likely(obj)) {
 		obj->object = addr;
 		obj->descr  = descr;
 		obj->state  = ODEBUG_STATE_NONE;
@@ -276,142 +469,58 @@ init_obj:
 	return obj;
 }
 
-/*
- * workqueue function to free objects.
- *
- * To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy.
- */
+/* workqueue function to free objects. */
 static void free_obj_work(struct work_struct *work)
 {
-	struct hlist_node *tmp;
-	struct debug_obj *obj;
-	unsigned long flags;
-	HLIST_HEAD(tofree);
+	static unsigned long last_use_avg;
+	unsigned long cur_used, last_used, delta;
+	unsigned int max_free = 0;
 
 	WRITE_ONCE(obj_freeing, false);
-	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
-		return;
 
-	if (obj_pool_free >= debug_objects_pool_size)
-		goto free_objs;
+	/* Rate limit freeing based on current use average */
+	cur_used = READ_ONCE(avg_usage);
+	last_used = last_use_avg;
+	last_use_avg = cur_used;
 
-	/*
-	 * The objs on the pool list might be allocated before the work is
-	 * run, so recheck if pool list it full or not, if not fill pool
-	 * list from the global free list. As it is likely that a workload
-	 * may be gearing up to use more and more objects, don't free any
-	 * of them until the next round.
-	 */
-	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
-		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-		hlist_del(&obj->node);
-		hlist_add_head(&obj->node, &obj_pool);
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
-	}
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
-	return;
+	if (!pool_count(&pool_to_free))
+		return;
 
-free_objs:
-	/*
-	 * Pool list is already full and there are still objs on the free
-	 * list. Move remaining free objs to a temporary list to free the
-	 * memory outside the pool_lock held region.
-	 */
-	if (obj_nr_tofree) {
-		hlist_move_list(&obj_to_free, &tofree);
-		debug_objects_freed += obj_nr_tofree;
-		WRITE_ONCE(obj_nr_tofree, 0);
+	if (cur_used <= last_used) {
+		delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
+		max_free = min(delta, ODEBUG_FREE_WORK_MAX);
 	}
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
 
-	hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
-		hlist_del(&obj->node);
-		kmem_cache_free(obj_cache, obj);
+	for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
+		HLIST_HEAD(tofree);
+
+		/* Acquire and drop the lock for each batch */
+		scoped_guard(raw_spinlock_irqsave, &pool_lock) {
+			if (!pool_to_free.cnt)
+				return;
+
+			/* Refill the global pool if possible */
+			if (pool_move_batch(&pool_global, &pool_to_free)) {
+				/* Don't free as there seems to be demand */
+				max_free = 0;
+			} else if (max_free) {
+				pool_pop_batch(&tofree, &pool_to_free);
+				max_free--;
+			} else {
+				return;
+			}
+		}
+		free_object_list(&tofree);
 	}
 }
 
 static void __free_object(struct debug_obj *obj)
 {
-	struct debug_obj *objs[ODEBUG_BATCH_SIZE];
-	struct debug_percpu_free *percpu_pool;
-	int lookahead_count = 0;
-	unsigned long flags;
-	bool work;
-
-	local_irq_save(flags);
-	if (!obj_cache)
-		goto free_to_obj_pool;
-
-	/*
-	 * Try to free it into the percpu pool first.
-	 */
-	percpu_pool = this_cpu_ptr(&percpu_obj_pool);
-	if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
-		hlist_add_head(&obj->node, &percpu_pool->free_objs);
-		percpu_pool->obj_free++;
-		local_irq_restore(flags);
-		return;
-	}
-
-	/*
-	 * As the percpu pool is full, look ahead and pull out a batch
-	 * of objects from the percpu pool and free them as well.
-	 */
-	for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
-		objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
-		if (!objs[lookahead_count])
-			break;
-		percpu_pool->obj_free--;
-	}
-
-free_to_obj_pool:
-	raw_spin_lock(&pool_lock);
-	work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
-	       (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
-	obj_pool_used--;
-
-	if (work) {
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-		hlist_add_head(&obj->node, &obj_to_free);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_to_free);
-			}
-		}
-
-		if ((obj_pool_free > debug_objects_pool_size) &&
-		    (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
-			int i;
-
-			/*
-			 * Free one more batch of objects from obj_pool.
-			 */
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				obj = __alloc_object(&obj_pool);
-				hlist_add_head(&obj->node, &obj_to_free);
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-				WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-			}
-		}
-	} else {
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		hlist_add_head(&obj->node, &obj_pool);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_pool);
-			}
-		}
-	}
-	raw_spin_unlock(&pool_lock);
-	local_irq_restore(flags);
+	guard(irqsave)();
+	if (static_branch_likely(&obj_cache_enabled))
+		pcpu_free(obj);
+	else
+		hlist_add_head(&obj->node, &pool_boot);
 }
 
 /*
@@ -421,63 +530,52 @@ free_to_obj_pool:
 static void free_object(struct debug_obj *obj)
 {
 	__free_object(obj);
-	if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
+	if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
 		WRITE_ONCE(obj_freeing, true);
 		schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int object_cpu_offline(unsigned int cpu)
+static void put_objects(struct hlist_head *list)
 {
-	struct debug_percpu_free *percpu_pool;
 	struct hlist_node *tmp;
 	struct debug_obj *obj;
-	unsigned long flags;
 
-	/* Remote access is safe as the CPU is dead already */
-	percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu);
-	hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) {
+	/*
+	 * Using free_object() puts the objects into reuse or schedules
+	 * them for freeing and it get's all the accounting correct.
+	 */
+	hlist_for_each_entry_safe(obj, tmp, list, node) {
 		hlist_del(&obj->node);
-		kmem_cache_free(obj_cache, obj);
+		free_object(obj);
 	}
+}
 
-	raw_spin_lock_irqsave(&pool_lock, flags);
-	obj_pool_used -= percpu_pool->obj_free;
-	debug_objects_freed += percpu_pool->obj_free;
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
-
-	percpu_pool->obj_free = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+static int object_cpu_offline(unsigned int cpu)
+{
+	/* Remote access is safe as the CPU is dead already */
+	struct obj_pool *pcp = per_cpu_ptr(&pool_pcpu, cpu);
 
+	put_objects(&pcp->objects);
+	pcp->cnt = 0;
 	return 0;
 }
 #endif
 
-/*
- * We run out of memory. That means we probably have tons of objects
- * allocated.
- */
+/* Out of memory. Free all objects from hash */
 static void debug_objects_oom(void)
 {
 	struct debug_bucket *db = obj_hash;
-	struct hlist_node *tmp;
 	HLIST_HEAD(freelist);
-	struct debug_obj *obj;
-	unsigned long flags;
-	int i;
 
 	pr_warn("Out of memory. ODEBUG disabled\n");
 
-	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
-		raw_spin_lock_irqsave(&db->lock, flags);
-		hlist_move_list(&db->list, &freelist);
-		raw_spin_unlock_irqrestore(&db->lock, flags);
+	for (int i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
+		scoped_guard(raw_spinlock_irqsave, &db->lock)
+			hlist_move_list(&db->list, &freelist);
 
-		/* Now free them */
-		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
-			hlist_del(&obj->node);
-			free_object(obj);
-		}
+		put_objects(&freelist);
 	}
 }
 
@@ -592,12 +690,24 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
 	}
 
 	/* Out of memory. Do the cleanup outside of the locked region */
-	debug_objects_enabled = 0;
+	debug_objects_enabled = false;
 	return NULL;
 }
 
 static void debug_objects_fill_pool(void)
 {
+	if (!static_branch_likely(&obj_cache_enabled))
+		return;
+
+	if (likely(!pool_should_refill(&pool_global)))
+		return;
+
+	/* Try reusing objects from obj_to_free_list */
+	fill_pool_from_freelist();
+
+	if (likely(!pool_should_refill(&pool_global)))
+		return;
+
 	/*
 	 * On RT enabled kernels the pool refill must happen in preemptible
 	 * context -- for !RT kernels we rely on the fact that spinlock_t and
@@ -1007,7 +1117,7 @@ repeat:
 		debug_objects_maxchecked = objs_checked;
 
 	/* Schedule work to actually kmem_cache_free() objects */
-	if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
+	if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
 		WRITE_ONCE(obj_freeing, true);
 		schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
 	}
@@ -1024,23 +1134,33 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
 
 static int debug_stats_show(struct seq_file *m, void *v)
 {
-	int cpu, obj_percpu_free = 0;
+	unsigned int cpu, pool_used, pcp_free = 0;
 
+	/*
+	 * pool_global.stats.cur_used is the number of batches currently
+	 * handed out to per CPU pools. Convert it to number of objects
+	 * and subtract the number of free objects in the per CPU pools.
+	 * As this is lockless the number is an estimate.
+	 */
 	for_each_possible_cpu(cpu)
-		obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu);
-
-	seq_printf(m, "max_chain     :%d\n", debug_objects_maxchain);
-	seq_printf(m, "max_checked   :%d\n", debug_objects_maxchecked);
-	seq_printf(m, "warnings      :%d\n", debug_objects_warnings);
-	seq_printf(m, "fixups        :%d\n", debug_objects_fixups);
-	seq_printf(m, "pool_free     :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free);
-	seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
-	seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
-	seq_printf(m, "pool_used     :%d\n", obj_pool_used - obj_percpu_free);
-	seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
-	seq_printf(m, "on_free_list  :%d\n", READ_ONCE(obj_nr_tofree));
-	seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
-	seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
+		pcp_free += per_cpu(pool_pcpu.cnt, cpu);
+
+	pool_used = READ_ONCE(pool_global.stats.cur_used);
+	pcp_free = min(pool_used, pcp_free);
+	pool_used -= pcp_free;
+
+	seq_printf(m, "max_chain     : %d\n", debug_objects_maxchain);
+	seq_printf(m, "max_checked   : %d\n", debug_objects_maxchecked);
+	seq_printf(m, "warnings      : %d\n", debug_objects_warnings);
+	seq_printf(m, "fixups        : %d\n", debug_objects_fixups);
+	seq_printf(m, "pool_free     : %u\n", pool_count(&pool_global) + pcp_free);
+	seq_printf(m, "pool_pcp_free : %u\n", pcp_free);
+	seq_printf(m, "pool_min_free : %u\n", data_race(pool_global.stats.min_fill));
+	seq_printf(m, "pool_used     : %u\n", pool_used);
+	seq_printf(m, "pool_max_used : %u\n", data_race(pool_global.stats.max_used));
+	seq_printf(m, "on_free_list  : %u\n", pool_count(&pool_to_free));
+	seq_printf(m, "objs_allocated: %d\n", debug_objects_allocated);
+	seq_printf(m, "objs_freed    : %d\n", debug_objects_freed);
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(debug_stats);
@@ -1194,7 +1314,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 out:
 	raw_spin_unlock_irqrestore(&db->lock, flags);
 	if (res)
-		debug_objects_enabled = 0;
+		debug_objects_enabled = false;
 	return res;
 }
 
@@ -1209,7 +1329,7 @@ static __initconst const struct debug_obj_descr descr_type_test = {
 
 static __initdata struct self_test obj = { .static_init = 0 };
 
-static void __init debug_objects_selftest(void)
+static bool __init debug_objects_selftest(void)
 {
 	int fixups, oldfixups, warnings, oldwarnings;
 	unsigned long flags;
@@ -1278,9 +1398,10 @@ out:
 	descr_test = NULL;
 
 	local_irq_restore(flags);
+	return debug_objects_enabled;
 }
 #else
-static inline void debug_objects_selftest(void) { }
+static inline bool debug_objects_selftest(void) { return true; }
 #endif
 
 /*
@@ -1295,65 +1416,54 @@ void __init debug_objects_early_init(void)
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++)
 		raw_spin_lock_init(&obj_hash[i].lock);
 
+	/* Keep early boot simple and add everything to the boot list */
 	for (i = 0; i < ODEBUG_POOL_SIZE; i++)
-		hlist_add_head(&obj_static_pool[i].node, &obj_pool);
+		hlist_add_head(&obj_static_pool[i].node, &pool_boot);
 }
 
 /*
- * Convert the statically allocated objects to dynamic ones:
+ * Convert the statically allocated objects to dynamic ones.
+ * debug_objects_mem_init() is called early so only one CPU is up and
+ * interrupts are disabled, which means it is safe to replace the active
+ * object references.
  */
-static int __init debug_objects_replace_static_objects(void)
+static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
 {
 	struct debug_bucket *db = obj_hash;
 	struct hlist_node *tmp;
-	struct debug_obj *obj, *new;
+	struct debug_obj *obj;
 	HLIST_HEAD(objects);
-	int i, cnt = 0;
+	int i;
 
-	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
-		obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
-		if (!obj)
+	for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
+		if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
 			goto free;
-		hlist_add_head(&obj->node, &objects);
+		pool_push_batch(&pool_global, &objects);
 	}
 
-	debug_objects_allocated += i;
-
-	/*
-	 * debug_objects_mem_init() is now called early that only one CPU is up
-	 * and interrupts have been disabled, so it is safe to replace the
-	 * active object references.
-	 */
-
-	/* Remove the statically allocated objects from the pool */
-	hlist_for_each_entry_safe(obj, tmp, &obj_pool, node)
-		hlist_del(&obj->node);
-	/* Move the allocated objects to the pool */
-	hlist_move_list(&objects, &obj_pool);
+	/* Disconnect the boot pool. */
+	pool_boot.first = NULL;
 
 	/* Replace the active object references */
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
 		hlist_move_list(&db->list, &objects);
 
 		hlist_for_each_entry(obj, &objects, node) {
-			new = hlist_entry(obj_pool.first, typeof(*obj), node);
-			hlist_del(&new->node);
+			struct debug_obj *new = pcpu_alloc();
+
 			/* copy object data */
 			*new = *obj;
 			hlist_add_head(&new->node, &db->list);
-			cnt++;
 		}
 	}
-
-	pr_debug("%d of %d active objects replaced\n",
-		 cnt, obj_pool_used);
-	return 0;
+	return true;
 free:
-	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
+	/* Can't use free_object_list() as the cache is not populated yet */
+	hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
 		hlist_del(&obj->node);
-		kmem_cache_free(obj_cache, obj);
+		kmem_cache_free(cache, obj);
 	}
-	return -ENOMEM;
+	return false;
 }
 
 /*
@@ -1364,43 +1474,40 @@ free:
  */
 void __init debug_objects_mem_init(void)
 {
-	int cpu, extras;
+	struct kmem_cache *cache;
+	int extras;
 
 	if (!debug_objects_enabled)
 		return;
 
-	/*
-	 * Initialize the percpu object pools
-	 *
-	 * Initialization is not strictly necessary, but was done for
-	 * completeness.
-	 */
-	for_each_possible_cpu(cpu)
-		INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu));
+	if (!debug_objects_selftest())
+		return;
 
-	obj_cache = kmem_cache_create("debug_objects_cache",
-				      sizeof (struct debug_obj), 0,
-				      SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
-				      NULL);
+	cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0,
+				  SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, NULL);
 
-	if (!obj_cache || debug_objects_replace_static_objects()) {
-		debug_objects_enabled = 0;
-		kmem_cache_destroy(obj_cache);
-		pr_warn("out of memory.\n");
+	if (!cache || !debug_objects_replace_static_objects(cache)) {
+		debug_objects_enabled = false;
+		pr_warn("Out of memory.\n");
 		return;
-	} else
-		debug_objects_selftest();
-
-#ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
-					object_cpu_offline);
-#endif
+	}
 
 	/*
-	 * Increase the thresholds for allocating and freeing objects
-	 * according to the number of possible CPUs available in the system.
+	 * Adjust the thresholds for allocating and freeing objects
+	 * according to the number of possible CPUs available in the
+	 * system.
 	 */
 	extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
-	debug_objects_pool_size += extras;
-	debug_objects_pool_min_level += extras;
+	pool_global.max_cnt += extras;
+	pool_global.min_cnt += extras;
+
+	/* Everything worked. Expose the cache */
+	obj_cache = cache;
+	static_branch_enable(&obj_cache_enabled);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
+				  object_cpu_offline);
+#endif
+	return;
 }
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 3518e7394eca..ca736166f100 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -232,7 +232,8 @@ static int INIT get_next_block(struct bunzip_data *bd)
 	   RUNB) */
 	symCount = symTotal+2;
 	for (j = 0; j < groupCount; j++) {
-		unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
+		unsigned char length[MAX_SYMBOLS];
+		unsigned short temp[MAX_HUFCODE_BITS+1];
 		int	minLen,	maxLen, pp;
 		/* Read Huffman code lengths for each symbol.  They're
 		   stored in a way similar to mtf; record a starting
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index e6327391b6b6..c0dbb3cea915 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -16,7 +16,7 @@
 #include <linux/decompress/mm.h>
 #include <linux/compiler.h>
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 /*
  * Note: Uncompressed chunk size is used in the compressor side
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 20a858031f12..9d34d35908da 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -37,7 +37,9 @@
 
 #include <linux/decompress/mm.h>
 
+#ifndef MIN
 #define	MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
 
 static long long INIT read_int(unsigned char *ptr, int size)
 {
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 64c1358500ce..57a9e93743e1 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -28,7 +28,7 @@
 #include <linux/decompress/mm.h>
 
 #include <linux/compiler.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 static const unsigned char lzop_magic[] = {
 	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 842894158944..32138bb8ef77 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 /*
@@ -103,12 +102,11 @@
 #ifdef STATIC
 #	define XZ_PREBOOT
 #else
-#include <linux/decompress/unxz.h>
+#	include <linux/decompress/unxz.h>
 #endif
 #ifdef __KERNEL__
 #	include <linux/decompress/mm.h>
 #endif
-#define XZ_EXTERN STATIC
 
 #ifndef XZ_PREBOOT
 #	include <linux/slab.h>
@@ -127,11 +125,21 @@
 #ifdef CONFIG_X86
 #	define XZ_DEC_X86
 #endif
-#ifdef CONFIG_PPC
+#if defined(CONFIG_PPC) && defined(CONFIG_CPU_BIG_ENDIAN)
 #	define XZ_DEC_POWERPC
 #endif
 #ifdef CONFIG_ARM
-#	define XZ_DEC_ARM
+#	ifdef CONFIG_THUMB2_KERNEL
+#		define XZ_DEC_ARMTHUMB
+#	else
+#		define XZ_DEC_ARM
+#	endif
+#endif
+#ifdef CONFIG_ARM64
+#	define XZ_DEC_ARM64
+#endif
+#ifdef CONFIG_RISCV
+#	define XZ_DEC_RISCV
 #endif
 #ifdef CONFIG_SPARC
 #	define XZ_DEC_SPARC
@@ -220,7 +228,7 @@ void *memmove(void *dest, const void *src, size_t size)
 #endif
 
 /*
- * Since we need memmove anyway, would use it as memcpy too.
+ * Since we need memmove anyway, we could use it as memcpy too.
  * Commented out for now to avoid breaking things.
  */
 /*
@@ -390,17 +398,17 @@ error_alloc_state:
 }
 
 /*
- * This macro is used by architecture-specific files to decompress
+ * This function is used by architecture-specific files to decompress
  * the kernel image.
  */
 #ifdef XZ_PREBOOT
-STATIC int INIT __decompress(unsigned char *buf, long len,
-			   long (*fill)(void*, unsigned long),
-			   long (*flush)(void*, unsigned long),
-			   unsigned char *out_buf, long olen,
-			   long *pos,
-			   void (*error)(char *x))
+STATIC int INIT __decompress(unsigned char *in, long in_size,
+			     long (*fill)(void *dest, unsigned long size),
+			     long (*flush)(void *src, unsigned long size),
+			     unsigned char *out, long out_size,
+			     long *in_used,
+			     void (*error)(char *x))
 {
-	return unxz(buf, len, fill, flush, out_buf, pos, error);
+	return unxz(in, in_size, fill, flush, out, in_used, error);
 }
 #endif
diff --git a/lib/devres.c b/lib/devres.c
index fe0c63caeb68..73901160197e 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,10 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
 #include <linux/device.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/gfp.h>
+#include <linux/errno.h>
 #include <linux/export.h>
+#include <linux/gfp_types.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
 #include <linux/of_address.h>
+#include <linux/types.h>
 
 enum devm_ioremap_type {
 	DEVM_IOREMAP = 0,
@@ -112,9 +115,8 @@ EXPORT_SYMBOL(devm_ioremap_wc);
  */
 void devm_iounmap(struct device *dev, void __iomem *addr)
 {
-	WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
+	WARN_ON(devres_release(dev, devm_ioremap_release, devm_ioremap_match,
 			       (__force void *)addr));
-	iounmap(addr);
 }
 EXPORT_SYMBOL(devm_iounmap);
 
@@ -125,12 +127,13 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
 	resource_size_t size;
 	void __iomem *dest_ptr;
 	char *pretty_name;
+	int ret;
 
 	BUG_ON(!dev);
 
 	if (!res || resource_type(res) != IORESOURCE_MEM) {
-		dev_err(dev, "invalid resource %pR\n", res);
-		return IOMEM_ERR_PTR(-EINVAL);
+		ret = dev_err_probe(dev, -EINVAL, "invalid resource %pR\n", res);
+		return IOMEM_ERR_PTR(ret);
 	}
 
 	if (type == DEVM_IOREMAP && res->flags & IORESOURCE_MEM_NONPOSTED)
@@ -144,20 +147,20 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
 	else
 		pretty_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
 	if (!pretty_name) {
-		dev_err(dev, "can't generate pretty name for resource %pR\n", res);
-		return IOMEM_ERR_PTR(-ENOMEM);
+		ret = dev_err_probe(dev, -ENOMEM, "can't generate pretty name for resource %pR\n", res);
+		return IOMEM_ERR_PTR(ret);
 	}
 
 	if (!devm_request_mem_region(dev, res->start, size, pretty_name)) {
-		dev_err(dev, "can't request region for resource %pR\n", res);
-		return IOMEM_ERR_PTR(-EBUSY);
+		ret = dev_err_probe(dev, -EBUSY, "can't request region for resource %pR\n", res);
+		return IOMEM_ERR_PTR(ret);
 	}
 
 	dest_ptr = __devm_ioremap(dev, res->start, size, type);
 	if (!dest_ptr) {
-		dev_err(dev, "ioremap failed for resource %pR\n", res);
 		devm_release_mem_region(dev, res->start, size);
-		dest_ptr = IOMEM_ERR_PTR(-ENOMEM);
+		ret = dev_err_probe(dev, -ENOMEM, "ioremap failed for resource %pR\n", res);
+		return IOMEM_ERR_PTR(ret);
 	}
 
 	return dest_ptr;
@@ -304,8 +307,7 @@ EXPORT_SYMBOL(devm_ioport_map);
  */
 void devm_ioport_unmap(struct device *dev, void __iomem *addr)
 {
-	ioport_unmap(addr);
-	WARN_ON(devres_destroy(dev, devm_ioport_map_release,
+	WARN_ON(devres_release(dev, devm_ioport_map_release,
 			       devm_ioport_map_match, (__force void *)addr));
 }
 EXPORT_SYMBOL(devm_ioport_unmap);
diff --git a/lib/dhry_run.c b/lib/dhry_run.c
index e6a279dabf84..4a6d05ce4361 100644
--- a/lib/dhry_run.c
+++ b/lib/dhry_run.c
@@ -83,4 +83,5 @@ static int __init dhry_init(void)
 module_init(dhry_init);
 
 MODULE_AUTHOR("Geert Uytterhoeven <geert+renesas@glider.be>");
+MODULE_DESCRIPTION("Dhrystone benchmark test module");
 MODULE_LICENSE("GPL");
diff --git a/lib/dim/Makefile b/lib/dim/Makefile
index c4cc4026c451..5b9bfaac7ac1 100644
--- a/lib/dim/Makefile
+++ b/lib/dim/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_DIMLIB) += dimlib.o
 
-dimlib-objs := dim.o net_dim.o rdma_dim.o
+dimlib-y := dim.o net_dim.o rdma_dim.o
diff --git a/lib/dim/dim.c b/lib/dim/dim.c
index 83b65ac74d73..97c3d084ebf0 100644
--- a/lib/dim/dim.c
+++ b/lib/dim/dim.c
@@ -54,7 +54,8 @@ void dim_park_tired(struct dim *dim)
 }
 EXPORT_SYMBOL(dim_park_tired);
 
-bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+bool dim_calc_stats(const struct dim_sample *start,
+		    const struct dim_sample *end,
 		    struct dim_stats *curr_stats)
 {
 	/* u32 holds up to 71 minutes, should be enough */
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
index 4e32f7aaac86..d6aa09a979b3 100644
--- a/lib/dim/net_dim.c
+++ b/lib/dim/net_dim.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/dim.h>
+#include <linux/rtnetlink.h>
 
 /*
  * Net DIM profiles:
@@ -11,12 +12,6 @@
  *        There are different set of profiles for RX/TX CQs.
  *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
  */
-#define NET_DIM_PARAMS_NUM_PROFILES 5
-#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
-#define NET_DIM_DEF_PROFILE_CQE 1
-#define NET_DIM_DEF_PROFILE_EQE 1
-
 #define NET_DIM_RX_EQE_PROFILES { \
 	{.usec = 1,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
 	{.usec = 8,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
@@ -101,6 +96,143 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode)
 }
 EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
 
+int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
+			   u8 coal_flags, u8 rx_mode, u8 tx_mode,
+			   void (*rx_dim_work)(struct work_struct *work),
+			   void (*tx_dim_work)(struct work_struct *work))
+{
+	struct dim_cq_moder *rxp = NULL, *txp;
+	struct dim_irq_moder *moder;
+	int len;
+
+	dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
+	if (!dev->irq_moder)
+		return -ENOMEM;
+
+	moder = dev->irq_moder;
+	len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
+
+	moder->coal_flags = coal_flags;
+	moder->profile_flags = profile_flags;
+
+	if (profile_flags & DIM_PROFILE_RX) {
+		moder->rx_dim_work = rx_dim_work;
+		moder->dim_rx_mode = rx_mode;
+		rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
+		if (!rxp)
+			goto free_moder;
+
+		rcu_assign_pointer(moder->rx_profile, rxp);
+	}
+
+	if (profile_flags & DIM_PROFILE_TX) {
+		moder->tx_dim_work = tx_dim_work;
+		moder->dim_tx_mode = tx_mode;
+		txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
+		if (!txp)
+			goto free_rxp;
+
+		rcu_assign_pointer(moder->tx_profile, txp);
+	}
+
+	return 0;
+
+free_rxp:
+	kfree(rxp);
+free_moder:
+	kfree(moder);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(net_dim_init_irq_moder);
+
+/* RTNL lock is held. */
+void net_dim_free_irq_moder(struct net_device *dev)
+{
+	struct dim_cq_moder *rxp, *txp;
+
+	if (!dev->irq_moder)
+		return;
+
+	rxp = rtnl_dereference(dev->irq_moder->rx_profile);
+	txp = rtnl_dereference(dev->irq_moder->tx_profile);
+
+	rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
+	rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
+
+	kfree_rcu(rxp, rcu);
+	kfree_rcu(txp, rcu);
+	kfree(dev->irq_moder);
+}
+EXPORT_SYMBOL(net_dim_free_irq_moder);
+
+void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx)
+{
+	struct dim_irq_moder *irq_moder = dev->irq_moder;
+
+	if (!irq_moder)
+		return;
+
+	if (is_tx) {
+		INIT_WORK(&dim->work, irq_moder->tx_dim_work);
+		dim->mode = READ_ONCE(irq_moder->dim_tx_mode);
+		return;
+	}
+
+	INIT_WORK(&dim->work, irq_moder->rx_dim_work);
+	dim->mode = READ_ONCE(irq_moder->dim_rx_mode);
+}
+EXPORT_SYMBOL(net_dim_setting);
+
+void net_dim_work_cancel(struct dim *dim)
+{
+	cancel_work_sync(&dim->work);
+}
+EXPORT_SYMBOL(net_dim_work_cancel);
+
+struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev,
+					     struct dim *dim)
+{
+	struct dim_cq_moder res, *profile;
+
+	rcu_read_lock();
+	profile = rcu_dereference(dev->irq_moder->rx_profile);
+	res = profile[dim->profile_ix];
+	rcu_read_unlock();
+
+	res.cq_period_mode = dim->mode;
+
+	return res;
+}
+EXPORT_SYMBOL(net_dim_get_rx_irq_moder);
+
+struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev,
+					     struct dim *dim)
+{
+	struct dim_cq_moder res, *profile;
+
+	rcu_read_lock();
+	profile = rcu_dereference(dev->irq_moder->tx_profile);
+	res = profile[dim->profile_ix];
+	rcu_read_unlock();
+
+	res.cq_period_mode = dim->mode;
+
+	return res;
+}
+EXPORT_SYMBOL(net_dim_get_tx_irq_moder);
+
+void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode)
+{
+	WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode);
+}
+EXPORT_SYMBOL(net_dim_set_rx_mode);
+
+void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode)
+{
+	WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode);
+}
+EXPORT_SYMBOL(net_dim_set_tx_mode);
+
 static int net_dim_step(struct dim *dim)
 {
 	if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
@@ -215,7 +347,7 @@ static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
 	return dim->profile_ix != prev_ix;
 }
 
-void net_dim(struct dim *dim, struct dim_sample end_sample)
+void net_dim(struct dim *dim, const struct dim_sample *end_sample)
 {
 	struct dim_stats curr_stats;
 	u16 nevents;
@@ -223,11 +355,11 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
 	switch (dim->state) {
 	case DIM_MEASURE_IN_PROGRESS:
 		nevents = BIT_GAP(BITS_PER_TYPE(u16),
-				  end_sample.event_ctr,
+				  end_sample->event_ctr,
 				  dim->start_sample.event_ctr);
 		if (nevents < DIM_NEVENTS)
 			break;
-		if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
+		if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats))
 			break;
 		if (net_dim_decision(&curr_stats, dim)) {
 			dim->state = DIM_APPLY_NEW_PROFILE;
@@ -236,8 +368,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
 		}
 		fallthrough;
 	case DIM_START_MEASURE:
-		dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
-				  end_sample.byte_ctr, &dim->start_sample);
+		dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr,
+				  end_sample->byte_ctr, &dim->start_sample);
 		dim->state = DIM_MEASURE_IN_PROGRESS;
 		break;
 	case DIM_APPLY_NEW_PROFILE:
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 222c6d6c8281..388da1aea14a 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -54,20 +54,26 @@ void __init dump_stack_set_arch_desc(const char *fmt, ...)
  */
 void dump_stack_print_info(const char *log_lvl)
 {
-	printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s" BUILD_ID_FMT "\n",
-	       log_lvl, raw_smp_processor_id(), current->pid, current->comm,
+	printk("%sCPU: %d UID: %u PID: %d Comm: %.20s %s%s %s %.*s" BUILD_ID_FMT "\n",
+	       log_lvl, raw_smp_processor_id(),
+	       __kuid_val(current_real_cred()->euid),
+	       current->pid, current->comm,
 	       kexec_crash_loaded() ? "Kdump: loaded " : "",
 	       print_tainted(),
 	       init_utsname()->release,
 	       (int)strcspn(init_utsname()->version, " "),
 	       init_utsname()->version, BUILD_ID_VAL);
 
+	if (get_taint())
+		printk("%s%s\n", log_lvl, print_tainted_verbose());
+
 	if (dump_stack_arch_desc_str[0] != '\0')
 		printk("%sHardware name: %s\n",
 		       log_lvl, dump_stack_arch_desc_str);
 
 	print_worker_info(log_lvl, current);
 	print_stop_info(log_lvl, current);
+	print_scx_info(log_lvl, current);
 }
 
 /**
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index f2c5e7910bb1..5a007952f7f2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -1147,7 +1147,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 		   iter->table->mod_name, dp->function,
 		   ddebug_describe_flags(dp->flags, &flags));
 	seq_escape_str(m, dp->format, ESCAPE_SPACE, "\t\r\n\"");
-	seq_puts(m, "\"");
+	seq_putc(m, '"');
 
 	if (dp->class_id != _DPRINTK_CLASS_DFLT) {
 		class = ddebug_class_name(iter, dp);
@@ -1156,7 +1156,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 		else
 			seq_printf(m, " class unknown, _id:%d", dp->class_id);
 	}
-	seq_puts(m, "\n");
+	seq_putc(m, '\n');
 
 	return 0;
 }
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index e49deddd3de9..c1b7638a594a 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -179,7 +179,7 @@ void dql_completed(struct dql *dql, unsigned int count)
 
 	dql->adj_limit = limit + completed;
 	dql->prev_ovlimit = ovlimit;
-	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->prev_last_obj_cnt = READ_ONCE(dql->last_obj_cnt);
 	dql->num_completed = completed;
 	dql->prev_num_queued = num_queued;
 
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index d608f9b48c10..999053fa133e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
+#include <linux/debugfs.h>
 #include <linux/sched.h>
 #include <linux/stat.h>
 #include <linux/types.h>
@@ -12,6 +13,24 @@
 #include <linux/fault-inject.h>
 
 /*
+ * The should_fail() functions use prandom instead of the normal Linux RNG
+ * since they don't need cryptographically secure random numbers.
+ */
+static DEFINE_PER_CPU(struct rnd_state, fault_rnd_state);
+
+static u32 fault_prandom_u32_below_100(void)
+{
+	struct rnd_state *state;
+	u32 res;
+
+	state = &get_cpu_var(fault_rnd_state);
+	res = prandom_u32_state(state);
+	put_cpu_var(fault_rnd_state);
+
+	return res % 100;
+}
+
+/*
  * setup_fault_attr() is a helper function for various __setup handlers, so it
  * returns 0 on error, because that is what __setup handlers do.
  */
@@ -30,6 +49,8 @@ int setup_fault_attr(struct fault_attr *attr, char *str)
 		return 0;
 	}
 
+	prandom_init_once(&fault_rnd_state);
+
 	attr->probability = probability;
 	attr->interval = interval;
 	atomic_set(&attr->times, times);
@@ -145,7 +166,7 @@ bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
 			return false;
 	}
 
-	if (attr->probability <= get_random_u32_below(100))
+	if (attr->probability <= fault_prandom_u32_below_100())
 		return false;
 
 fail:
@@ -218,6 +239,8 @@ struct dentry *fault_create_debugfs_attr(const char *name,
 	if (IS_ERR(dir))
 		return dir;
 
+	prandom_init_once(&fault_rnd_state);
+
 	debugfs_create_ul("probability", mode, dir, &attr->probability);
 	debugfs_create_ul("interval", mode, dir, &attr->interval);
 	debugfs_create_atomic_t("times", mode, dir, &attr->times);
@@ -430,6 +453,8 @@ static const struct config_item_type fault_config_type = {
 
 void fault_config_init(struct fault_config *config, const char *name)
 {
+	prandom_init_once(&fault_rnd_state);
+
 	config_group_init_type_name(&config->group, name, &fault_config_type);
 }
 EXPORT_SYMBOL_GPL(fault_config_init);
diff --git a/lib/find_bit.c b/lib/find_bit.c
index dacadd904250..0836bb3d76c5 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -87,7 +87,7 @@ out:										\
 	if (sz % BITS_PER_LONG)							\
 		tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz);			\
 found:										\
-	sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz);			\
+	sz = idx * BITS_PER_LONG + fns(tmp, nr);				\
 out:										\
 	sz;									\
 })
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index d3fb09e6eff1..402e160e7186 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -194,4 +194,5 @@ static int __init find_bit_test(void)
 }
 module_init(find_bit_test);
 
+MODULE_DESCRIPTION("Test for find_*_bit functions");
 MODULE_LICENSE("GPL");
diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig
index 7e945fdcbf11..ae59b5b4e225 100644
--- a/lib/fonts/Kconfig
+++ b/lib/fonts/Kconfig
@@ -10,7 +10,7 @@ if FONT_SUPPORT
 
 config FONTS
 	bool "Select compiled-in fonts"
-	depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE || DRM_PANIC || DRM_CLIENT_LOG
 	help
 	  Say Y here if you would like to use fonts other than the default
 	  your frame buffer console usually use.
@@ -23,7 +23,7 @@ config FONTS
 
 config FONT_8x8
 	bool "VGA 8x8 font" if FONTS
-	depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE || DRM_PANIC
 	default y if !SPARC && !FONTS
 	help
 	  This is the "high resolution" font for the VGA frame buffer (the one
@@ -46,7 +46,7 @@ config FONT_8x16
 
 config FONT_6x11
 	bool "Mac console 6x11 font (not supported by all drivers)" if FONTS
-	depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE || DRM_PANIC
 	default y if !SPARC && !FONTS && MAC
 	help
 	  Small console font with Macintosh-style high-half glyphs.  Some Mac
@@ -54,7 +54,7 @@ config FONT_6x11
 
 config FONT_7x14
 	bool "console 7x14 font (not supported by all drivers)" if FONTS
-	depends on FRAMEBUFFER_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
 	help
 	  Console font with characters just a bit smaller than the default.
 	  If the standard 8x16 font is a little too big for you, say Y.
@@ -62,7 +62,7 @@ config FONT_7x14
 
 config FONT_PEARL_8x8
 	bool "Pearl (old m68k) console 8x8 font" if FONTS
-	depends on FRAMEBUFFER_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
 	default y if !SPARC && !FONTS && AMIGA
 	help
 	  Small console font with PC-style control-character and high-half
@@ -70,7 +70,7 @@ config FONT_PEARL_8x8
 
 config FONT_ACORN_8x8
 	bool "Acorn console 8x8 font" if FONTS
-	depends on FRAMEBUFFER_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
 	default y if !SPARC && !FONTS && ARM && ARCH_ACORN
 	help
 	  Small console font with PC-style control characters and high-half
@@ -90,7 +90,7 @@ config FONT_6x10
 
 config FONT_10x18
 	bool "console 10x18 font (not supported by all drivers)" if FONTS
-	depends on FRAMEBUFFER_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
 	help
 	  This is a high resolution console font for machines with very
 	  big letters. It fits between the sun 12x22 and the normal 8x16 font.
@@ -105,7 +105,8 @@ config FONT_SUN8x16
 
 config FONT_SUN12x22
 	bool "Sparc console 12x22 font (not supported by all drivers)"
-	depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC)
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
+	depends on !SPARC && FONTS
 	help
 	  This is the high resolution console font for Sun machines with very
 	  big letters (like the letters used in the SPARC PROM). If the
@@ -113,7 +114,8 @@ config FONT_SUN12x22
 
 config FONT_TER16x32
 	bool "Terminus 16x32 font (not supported by all drivers)"
-	depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC)
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
+	depends on !SPARC && FONTS || SPARC
 	help
 	  Terminus Font is a clean, fixed width bitmap font, designed
 	  for long (8 and more hours per day) work with computers.
@@ -122,7 +124,7 @@ config FONT_TER16x32
 
 config FONT_6x8
 	bool "OLED 6x8 font" if FONTS
-	depends on FRAMEBUFFER_CONSOLE
+	depends on FRAMEBUFFER_CONSOLE || DRM_PANIC
 	help
 	  This font is useful for small displays (OLED).
 
diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index d2377e00caab..ecb638d4cde1 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -234,11 +234,6 @@ static void fortify_test_alloc_size_##allocator##_dynamic(struct kunit *test) \
 	checker(expected_size,						\
 		kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE),	\
 		kfree(p));						\
-	checker(expected_size, __kmalloc(alloc_size, gfp),		\
-		kfree(p));						\
-	checker(expected_size,						\
-		__kmalloc_node(alloc_size, gfp, NUMA_NO_NODE),		\
-		kfree(p));						\
 									\
 	orig = kmalloc(alloc_size, gfp);				\
 	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
@@ -311,8 +306,7 @@ DEFINE_ALLOC_SIZE_TEST_PAIR(vmalloc)
 	orig = kvmalloc(prev_size, gfp);				\
 	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
 	checker(((expected_pages) * PAGE_SIZE) * 2,			\
-		kvrealloc(orig, prev_size,				\
-			  ((alloc_pages) * PAGE_SIZE) * 2, gfp),	\
+		kvrealloc(orig, ((alloc_pages) * PAGE_SIZE) * 2, gfp),	\
 		kvfree(p));						\
 } while (0)
 DEFINE_ALLOC_SIZE_TEST_PAIR(kvmalloc)
@@ -377,7 +371,7 @@ static const char * const test_strs[] = {
 	for (i = 0; i < ARRAY_SIZE(test_strs); i++) {			\
 		len = strlen(test_strs[i]);				\
 		KUNIT_EXPECT_EQ(test, __builtin_constant_p(len), 0);	\
-		checker(len, kmemdup_array(test_strs[i], len, 1, gfp),	\
+		checker(len, kmemdup_array(test_strs[i], 1, len, gfp),	\
 			kfree(p));					\
 		checker(len, kmemdup(test_strs[i], len, gfp),		\
 			kfree(p));					\
@@ -913,10 +907,9 @@ static void fortify_test_##memfunc(struct kunit *test)		\
 	memfunc(zero.buf, srcB, 0 + unconst);			\
 	KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);	\
 	KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);	\
-	/* We currently explicitly ignore zero-sized dests. */	\
 	memfunc(zero.buf, srcB, 1 + unconst);			\
 	KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);	\
-	KUNIT_EXPECT_EQ(test, fortify_write_overflows, 0);	\
+	KUNIT_EXPECT_EQ(test, fortify_write_overflows, 1);	\
 }
 __fortify_test(memcpy)
 __fortify_test(memmove)
@@ -990,7 +983,7 @@ static void fortify_test_memcmp(struct kunit *test)
 	KUNIT_ASSERT_EQ(test, memcmp(one, two, one_len), 0);
 	KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
 	/* Still in bounds, but no longer matching. */
-	KUNIT_ASSERT_EQ(test, memcmp(one, two, one_len + 1), -32);
+	KUNIT_ASSERT_LT(test, memcmp(one, two, one_len + 1), 0);
 	KUNIT_EXPECT_EQ(test, fortify_read_overflows, 0);
 
 	/* Catch too-large ranges. */
@@ -1099,4 +1092,5 @@ static struct kunit_suite fortify_test_suite = {
 
 kunit_test_suite(fortify_test_suite);
 
+MODULE_DESCRIPTION("Runtime test cases for CONFIG_FORTIFY_SOURCE");
 MODULE_LICENSE("GPL");
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index f755b997b967..6d03425b849e 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -2,30 +2,11 @@
 #include <stdio.h>
 #include "../include/linux/crc32poly.h"
 #include "../include/generated/autoconf.h"
-#include "crc32defs.h"
 #include <inttypes.h>
 
-#define ENTRIES_PER_LINE 4
-
-#if CRC_LE_BITS > 8
-# define LE_TABLE_ROWS (CRC_LE_BITS/8)
-# define LE_TABLE_SIZE 256
-#else
-# define LE_TABLE_ROWS 1
-# define LE_TABLE_SIZE (1 << CRC_LE_BITS)
-#endif
-
-#if CRC_BE_BITS > 8
-# define BE_TABLE_ROWS (CRC_BE_BITS/8)
-# define BE_TABLE_SIZE 256
-#else
-# define BE_TABLE_ROWS 1
-# define BE_TABLE_SIZE (1 << CRC_BE_BITS)
-#endif
-
-static uint32_t crc32table_le[LE_TABLE_ROWS][256];
-static uint32_t crc32table_be[BE_TABLE_ROWS][256];
-static uint32_t crc32ctable_le[LE_TABLE_ROWS][256];
+static uint32_t crc32table_le[256];
+static uint32_t crc32table_be[256];
+static uint32_t crc32ctable_le[256];
 
 /**
  * crc32init_le() - allocate and initialize LE table data
@@ -34,25 +15,17 @@ static uint32_t crc32ctable_le[LE_TABLE_ROWS][256];
  * fact that crctable[i^j] = crctable[i] ^ crctable[j].
  *
  */
-static void crc32init_le_generic(const uint32_t polynomial,
-				 uint32_t (*tab)[256])
+static void crc32init_le_generic(const uint32_t polynomial, uint32_t tab[256])
 {
 	unsigned i, j;
 	uint32_t crc = 1;
 
-	tab[0][0] = 0;
+	tab[0] = 0;
 
-	for (i = LE_TABLE_SIZE >> 1; i; i >>= 1) {
+	for (i = 128; i; i >>= 1) {
 		crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
-		for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
-			tab[0][i + j] = crc ^ tab[0][j];
-	}
-	for (i = 0; i < LE_TABLE_SIZE; i++) {
-		crc = tab[0][i];
-		for (j = 1; j < LE_TABLE_ROWS; j++) {
-			crc = tab[0][crc & 0xff] ^ (crc >> 8);
-			tab[j][i] = crc;
-		}
+		for (j = 0; j < 256; j += 2 * i)
+			tab[i + j] = crc ^ tab[j];
 	}
 }
 
@@ -74,34 +47,22 @@ static void crc32init_be(void)
 	unsigned i, j;
 	uint32_t crc = 0x80000000;
 
-	crc32table_be[0][0] = 0;
+	crc32table_be[0] = 0;
 
-	for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
+	for (i = 1; i < 256; i <<= 1) {
 		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32_POLY_BE : 0);
 		for (j = 0; j < i; j++)
-			crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
-	}
-	for (i = 0; i < BE_TABLE_SIZE; i++) {
-		crc = crc32table_be[0][i];
-		for (j = 1; j < BE_TABLE_ROWS; j++) {
-			crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
-			crc32table_be[j][i] = crc;
-		}
+			crc32table_be[i + j] = crc ^ crc32table_be[j];
 	}
 }
 
-static void output_table(uint32_t (*table)[256], int rows, int len, char *trans)
+static void output_table(const uint32_t table[256])
 {
-	int i, j;
-
-	for (j = 0 ; j < rows; j++) {
-		printf("{");
-		for (i = 0; i < len - 1; i++) {
-			if (i % ENTRIES_PER_LINE == 0)
-				printf("\n");
-			printf("%s(0x%8.8xL), ", trans, table[j][i]);
-		}
-		printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
+	int i;
+
+	for (i = 0; i < 256; i += 4) {
+		printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
+		       table[i], table[i + 1], table[i + 2], table[i + 3]);
 	}
 }
 
@@ -109,34 +70,20 @@ int main(int argc, char** argv)
 {
 	printf("/* this file is generated - do not edit */\n\n");
 
-	if (CRC_LE_BITS > 1) {
-		crc32init_le();
-		printf("static const u32 ____cacheline_aligned "
-		       "crc32table_le[%d][%d] = {",
-		       LE_TABLE_ROWS, LE_TABLE_SIZE);
-		output_table(crc32table_le, LE_TABLE_ROWS,
-			     LE_TABLE_SIZE, "tole");
-		printf("};\n");
-	}
+	crc32init_le();
+	printf("static const u32 ____cacheline_aligned crc32table_le[256] = {\n");
+	output_table(crc32table_le);
+	printf("};\n");
 
-	if (CRC_BE_BITS > 1) {
-		crc32init_be();
-		printf("static const u32 ____cacheline_aligned "
-		       "crc32table_be[%d][%d] = {",
-		       BE_TABLE_ROWS, BE_TABLE_SIZE);
-		output_table(crc32table_be, LE_TABLE_ROWS,
-			     BE_TABLE_SIZE, "tobe");
-		printf("};\n");
-	}
-	if (CRC_LE_BITS > 1) {
-		crc32cinit_le();
-		printf("static const u32 ____cacheline_aligned "
-		       "crc32ctable_le[%d][%d] = {",
-		       LE_TABLE_ROWS, LE_TABLE_SIZE);
-		output_table(crc32ctable_le, LE_TABLE_ROWS,
-			     LE_TABLE_SIZE, "tole");
-		printf("};\n");
-	}
+	crc32init_be();
+	printf("static const u32 ____cacheline_aligned crc32table_be[256] = {\n");
+	output_table(crc32table_be);
+	printf("};\n");
+
+	crc32cinit_le();
+	printf("static const u32 ____cacheline_aligned crc32ctable_le[256] = {\n");
+	output_table(crc32ctable_le);
+	printf("};\n");
 
 	return 0;
 }
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
index aaefb9b678c8..79e067b51488 100644
--- a/lib/generic-radix-tree.c
+++ b/lib/generic-radix-tree.c
@@ -5,99 +5,31 @@
 #include <linux/gfp.h>
 #include <linux/kmemleak.h>
 
-#define GENRADIX_ARY		(GENRADIX_NODE_SIZE / sizeof(struct genradix_node *))
-#define GENRADIX_ARY_SHIFT	ilog2(GENRADIX_ARY)
-
-struct genradix_node {
-	union {
-		/* Interior node: */
-		struct genradix_node	*children[GENRADIX_ARY];
-
-		/* Leaf: */
-		u8			data[GENRADIX_NODE_SIZE];
-	};
-};
-
-static inline int genradix_depth_shift(unsigned depth)
-{
-	return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth;
-}
-
-/*
- * Returns size (of data, in bytes) that a tree of a given depth holds:
- */
-static inline size_t genradix_depth_size(unsigned depth)
-{
-	return 1UL << genradix_depth_shift(depth);
-}
-
-/* depth that's needed for a genradix that can address up to ULONG_MAX: */
-#define GENRADIX_MAX_DEPTH	\
-	DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT)
-
-#define GENRADIX_DEPTH_MASK				\
-	((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
-
-static inline unsigned genradix_root_to_depth(struct genradix_root *r)
-{
-	return (unsigned long) r & GENRADIX_DEPTH_MASK;
-}
-
-static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
-{
-	return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
-}
-
 /*
  * Returns pointer to the specified byte @offset within @radix, or NULL if not
  * allocated
  */
 void *__genradix_ptr(struct __genradix *radix, size_t offset)
 {
-	struct genradix_root *r = READ_ONCE(radix->root);
-	struct genradix_node *n = genradix_root_to_node(r);
-	unsigned level		= genradix_root_to_depth(r);
-
-	if (ilog2(offset) >= genradix_depth_shift(level))
-		return NULL;
-
-	while (1) {
-		if (!n)
-			return NULL;
-		if (!level)
-			break;
-
-		level--;
-
-		n = n->children[offset >> genradix_depth_shift(level)];
-		offset &= genradix_depth_size(level) - 1;
-	}
-
-	return &n->data[offset];
+	return __genradix_ptr_inlined(radix, offset);
 }
 EXPORT_SYMBOL(__genradix_ptr);
 
-static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
-{
-	return kzalloc(GENRADIX_NODE_SIZE, gfp_mask);
-}
-
-static inline void genradix_free_node(struct genradix_node *node)
-{
-	kfree(node);
-}
-
 /*
  * Returns pointer to the specified byte @offset within @radix, allocating it if
  * necessary - newly allocated slots are always zeroed out:
  */
 void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+			   struct genradix_node **preallocated,
 			   gfp_t gfp_mask)
 {
 	struct genradix_root *v = READ_ONCE(radix->root);
 	struct genradix_node *n, *new_node = NULL;
 	unsigned level;
 
+	if (preallocated)
+		swap(new_node, *preallocated);
+
 	/* Increase tree depth if necessary: */
 	while (1) {
 		struct genradix_root *r = v, *new_root;
@@ -121,6 +53,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
 		if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
 			v = new_root;
 			new_node = NULL;
+		} else {
+			new_node->children[0] = NULL;
 		}
 	}
 
@@ -279,7 +213,7 @@ int __genradix_prealloc(struct __genradix *radix, size_t size,
 	size_t offset;
 
 	for (offset = 0; offset < size; offset += GENRADIX_NODE_SIZE)
-		if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+		if (!__genradix_ptr_alloc(radix, offset, NULL, gfp_mask))
 			return -ENOMEM;
 
 	return 0;
diff --git a/lib/glob.c b/lib/glob.c
index 15b73f490720..aa57900d2062 100644
--- a/lib/glob.c
+++ b/lib/glob.c
@@ -68,6 +68,8 @@ bool __pure glob_match(char const *pat, char const *str)
 			back_str = --str;	/* Allow zero-length match */
 			break;
 		case '[': {	/* Character class */
+			if (c == '\0')	/* No possible match */
+				return false;
 			bool match = false, inverted = (*pat == '!');
 			char const *class = pat + inverted;
 			unsigned char a = *class++;
diff --git a/lib/hashtable_test.c b/lib/hashtable_test.c
index 1d1b3288dee2..3521de6bad15 100644
--- a/lib/hashtable_test.c
+++ b/lib/hashtable_test.c
@@ -314,4 +314,5 @@ static struct kunit_suite hashtable_test_module = {
 
 kunit_test_suites(&hashtable_test_module);
 
+MODULE_DESCRIPTION("KUnit test for the Kernel Hashtable structures");
 MODULE_LICENSE("GPL");
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 06833d404398..c3db7c3a7643 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/minmax.h>
 #include <linux/export.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 const char hex_asc[] = "0123456789abcdef";
 EXPORT_SYMBOL(hex_asc);
diff --git a/lib/inflate.c b/lib/inflate.c
index fbaf03c1748d..eab886baa1b4 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -1257,8 +1257,6 @@ static int INIT gunzip(void)
     /* Decompress */
     if ((res = inflate())) {
 	    switch (res) {
-	    case 0:
-		    break;
 	    case 1:
 		    error("invalid compressed format (err=1)");
 		    break;
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index f37f4d44faa9..837064b83a6c 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -2,7 +2,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/interval_tree.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <asm/timex.h>
 
diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c
index 5de7c04e05ef..2fd5712fb7c0 100644
--- a/lib/iomap_copy.c
+++ b/lib/iomap_copy.c
@@ -16,9 +16,8 @@
  * time.  Order of access is not guaranteed, nor is a memory barrier
  * performed afterwards.
  */
-void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
-					    const void *from,
-					    size_t count)
+#ifndef __iowrite32_copy
+void __iowrite32_copy(void __iomem *to, const void *from, size_t count)
 {
 	u32 __iomem *dst = to;
 	const u32 *src = from;
@@ -28,6 +27,7 @@ void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
 		__raw_writel(*src++, dst++);
 }
 EXPORT_SYMBOL_GPL(__iowrite32_copy);
+#endif
 
 /**
  * __ioread32_copy - copy data from MMIO space, in 32-bit units
@@ -60,9 +60,8 @@ EXPORT_SYMBOL_GPL(__ioread32_copy);
  * time.  Order of access is not guaranteed, nor is a memory barrier
  * performed afterwards.
  */
-void __attribute__((weak)) __iowrite64_copy(void __iomem *to,
-					    const void *from,
-					    size_t count)
+#ifndef __iowrite64_copy
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
 {
 #ifdef CONFIG_64BIT
 	u64 __iomem *dst = to;
@@ -75,5 +74,5 @@ void __attribute__((weak)) __iowrite64_copy(void __iomem *to,
 	__iowrite32_copy(to, from, count * 2);
 #endif
 }
-
 EXPORT_SYMBOL_GPL(__iowrite64_copy);
+#endif
diff --git a/lib/iomem_copy.c b/lib/iomem_copy.c
new file mode 100644
index 000000000000..dec7eaea60e0
--- /dev/null
+++ b/lib/iomem_copy.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Kalray, Inc.  All Rights Reserved.
+ */
+
+#include <linux/align.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/unaligned.h>
+
+#ifndef memset_io
+/**
+ * memset_io() - Set a range of I/O memory to a constant value
+ * @addr: The beginning of the I/O-memory range to set
+ * @val: The value to set the memory to
+ * @count: The number of bytes to set
+ *
+ * Set a range of I/O memory to a given value.
+ */
+void memset_io(volatile void __iomem *addr, int val, size_t count)
+{
+	long qc = (u8)val;
+
+	qc *= ~0UL / 0xff;
+
+	while (count && !IS_ALIGNED((long)addr, sizeof(long))) {
+		__raw_writeb(val, addr);
+		addr++;
+		count--;
+	}
+
+	while (count >= sizeof(long)) {
+#ifdef CONFIG_64BIT
+		__raw_writeq(qc, addr);
+#else
+		__raw_writel(qc, addr);
+#endif
+
+		addr += sizeof(long);
+		count -= sizeof(long);
+	}
+
+	while (count) {
+		__raw_writeb(val, addr);
+		addr++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(memset_io);
+#endif
+
+#ifndef memcpy_fromio
+/**
+ * memcpy_fromio() - Copy a block of data from I/O memory
+ * @dst: The (RAM) destination for the copy
+ * @src: The (I/O memory) source for the data
+ * @count: The number of bytes to copy
+ *
+ * Copy a block of data from I/O memory.
+ */
+void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
+{
+	while (count && !IS_ALIGNED((long)src, sizeof(long))) {
+		*(u8 *)dst = __raw_readb(src);
+		src++;
+		dst++;
+		count--;
+	}
+
+	while (count >= sizeof(long)) {
+#ifdef CONFIG_64BIT
+		long val = __raw_readq(src);
+#else
+		long val = __raw_readl(src);
+#endif
+		put_unaligned(val, (long *)dst);
+
+
+		src += sizeof(long);
+		dst += sizeof(long);
+		count -= sizeof(long);
+	}
+
+	while (count) {
+		*(u8 *)dst = __raw_readb(src);
+		src++;
+		dst++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(memcpy_fromio);
+#endif
+
+#ifndef memcpy_toio
+/**
+ * memcpy_toio() -Copy a block of data into I/O memory
+ * @dst: The (I/O memory) destination for the copy
+ * @src: The (RAM) source for the data
+ * @count: The number of bytes to copy
+ *
+ * Copy a block of data to I/O memory.
+ */
+void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
+{
+	while (count && !IS_ALIGNED((long)dst, sizeof(long))) {
+		__raw_writeb(*(u8 *)src, dst);
+		src++;
+		dst++;
+		count--;
+	}
+
+	while (count >= sizeof(long)) {
+		long val = get_unaligned((long *)src);
+#ifdef CONFIG_64BIT
+		__raw_writeq(val, dst);
+#else
+		__raw_writel(val, dst);
+#endif
+
+		src += sizeof(long);
+		dst += sizeof(long);
+		count -= sizeof(long);
+	}
+
+	while (count) {
+		__raw_writeb(*(u8 *)src, dst);
+		src++;
+		dst++;
+		count--;
+	}
+}
+EXPORT_SYMBOL(memcpy_toio);
+#endif
+
+
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 4a6a9f419bd7..9ec806f989f2 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -461,6 +461,8 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
 		size_t bytes, struct iov_iter *i)
 {
 	size_t n, copied = 0;
+	bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
+			 PageHighMem(page);
 
 	if (!page_copy_sane(page, offset, bytes))
 		return 0;
@@ -471,7 +473,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
 		char *p;
 
 		n = bytes - copied;
-		if (PageHighMem(page)) {
+		if (uses_kmap) {
 			page += offset / PAGE_SIZE;
 			offset %= PAGE_SIZE;
 			n = min_t(size_t, n, PAGE_SIZE - offset);
@@ -482,7 +484,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
 		kunmap_atomic(p);
 		copied += n;
 		offset += n;
-	} while (PageHighMem(page) && copied != bytes && n > 0);
+	} while (uses_kmap && copied != bytes && n > 0);
 
 	return copied;
 }
@@ -527,6 +529,39 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
 	i->__iov = iov;
 }
 
+static void iov_iter_folioq_advance(struct iov_iter *i, size_t size)
+{
+	const struct folio_queue *folioq = i->folioq;
+	unsigned int slot = i->folioq_slot;
+
+	if (!i->count)
+		return;
+	i->count -= size;
+
+	if (slot >= folioq_nr_slots(folioq)) {
+		folioq = folioq->next;
+		slot = 0;
+	}
+
+	size += i->iov_offset; /* From beginning of current segment. */
+	do {
+		size_t fsize = folioq_folio_size(folioq, slot);
+
+		if (likely(size < fsize))
+			break;
+		size -= fsize;
+		slot++;
+		if (slot >= folioq_nr_slots(folioq) && folioq->next) {
+			folioq = folioq->next;
+			slot = 0;
+		}
+	} while (size);
+
+	i->iov_offset = size;
+	i->folioq_slot = slot;
+	i->folioq = folioq;
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(i->count < size))
@@ -539,12 +574,40 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 		iov_iter_iovec_advance(i, size);
 	} else if (iov_iter_is_bvec(i)) {
 		iov_iter_bvec_advance(i, size);
+	} else if (iov_iter_is_folioq(i)) {
+		iov_iter_folioq_advance(i, size);
 	} else if (iov_iter_is_discard(i)) {
 		i->count -= size;
 	}
 }
 EXPORT_SYMBOL(iov_iter_advance);
 
+static void iov_iter_folioq_revert(struct iov_iter *i, size_t unroll)
+{
+	const struct folio_queue *folioq = i->folioq;
+	unsigned int slot = i->folioq_slot;
+
+	for (;;) {
+		size_t fsize;
+
+		if (slot == 0) {
+			folioq = folioq->prev;
+			slot = folioq_nr_slots(folioq);
+		}
+		slot--;
+
+		fsize = folioq_folio_size(folioq, slot);
+		if (unroll <= fsize) {
+			i->iov_offset = fsize - unroll;
+			break;
+		}
+		unroll -= fsize;
+	}
+
+	i->folioq_slot = slot;
+	i->folioq = folioq;
+}
+
 void iov_iter_revert(struct iov_iter *i, size_t unroll)
 {
 	if (!unroll)
@@ -576,6 +639,9 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 			}
 			unroll -= n;
 		}
+	} else if (iov_iter_is_folioq(i)) {
+		i->iov_offset = 0;
+		iov_iter_folioq_revert(i, unroll);
 	} else { /* same logics for iovec and kvec */
 		const struct iovec *iov = iter_iov(i);
 		while (1) {
@@ -603,6 +669,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 		if (iov_iter_is_bvec(i))
 			return min(i->count, i->bvec->bv_len - i->iov_offset);
 	}
+	if (unlikely(iov_iter_is_folioq(i)))
+		return !i->count ? 0 :
+			umin(folioq_folio_size(i->folioq, i->folioq_slot), i->count);
 	return i->count;
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
@@ -640,6 +709,36 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
 EXPORT_SYMBOL(iov_iter_bvec);
 
 /**
+ * iov_iter_folio_queue - Initialise an I/O iterator to use the folios in a folio queue
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @folioq: The starting point in the folio queue.
+ * @first_slot: The first slot in the folio queue to use
+ * @offset: The offset into the folio in the first slot to start at
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator to either draw data out of the pages attached to an
+ * inode or to inject data into those pages.  The pages *must* be prevented
+ * from evaporation, either by taking a ref on them or locking them by the
+ * caller.
+ */
+void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction,
+			  const struct folio_queue *folioq, unsigned int first_slot,
+			  unsigned int offset, size_t count)
+{
+	BUG_ON(direction & ~1);
+	*i = (struct iov_iter) {
+		.iter_type = ITER_FOLIOQ,
+		.data_source = direction,
+		.folioq = folioq,
+		.folioq_slot = first_slot,
+		.count = count,
+		.iov_offset = offset,
+	};
+}
+EXPORT_SYMBOL(iov_iter_folio_queue);
+
+/**
  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
  * @i: The iterator to initialise.
  * @direction: The direction of the transfer.
@@ -765,12 +864,19 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
 	if (iov_iter_is_bvec(i))
 		return iov_iter_aligned_bvec(i, addr_mask, len_mask);
 
+	/* With both xarray and folioq types, we're dealing with whole folios. */
 	if (iov_iter_is_xarray(i)) {
 		if (i->count & len_mask)
 			return false;
 		if ((i->xarray_start + i->iov_offset) & addr_mask)
 			return false;
 	}
+	if (iov_iter_is_folioq(i)) {
+		if (i->count & len_mask)
+			return false;
+		if (i->iov_offset & addr_mask)
+			return false;
+	}
 
 	return true;
 }
@@ -835,6 +941,9 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	if (iov_iter_is_bvec(i))
 		return iov_iter_alignment_bvec(i);
 
+	/* With both xarray and folioq types, we're dealing with whole folios. */
+	if (iov_iter_is_folioq(i))
+		return i->iov_offset | i->count;
 	if (iov_iter_is_xarray(i))
 		return (i->xarray_start + i->iov_offset) | i->count;
 
@@ -887,6 +996,65 @@ static int want_pages_array(struct page ***res, size_t size,
 	return count;
 }
 
+static ssize_t iter_folioq_get_pages(struct iov_iter *iter,
+				     struct page ***ppages, size_t maxsize,
+				     unsigned maxpages, size_t *_start_offset)
+{
+	const struct folio_queue *folioq = iter->folioq;
+	struct page **pages;
+	unsigned int slot = iter->folioq_slot;
+	size_t extracted = 0, count = iter->count, iov_offset = iter->iov_offset;
+
+	if (slot >= folioq_nr_slots(folioq)) {
+		folioq = folioq->next;
+		slot = 0;
+		if (WARN_ON(iov_offset != 0))
+			return -EIO;
+	}
+
+	maxpages = want_pages_array(ppages, maxsize, iov_offset & ~PAGE_MASK, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	*_start_offset = iov_offset & ~PAGE_MASK;
+	pages = *ppages;
+
+	for (;;) {
+		struct folio *folio = folioq_folio(folioq, slot);
+		size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot);
+		size_t part = PAGE_SIZE - offset % PAGE_SIZE;
+
+		if (offset < fsize) {
+			part = umin(part, umin(maxsize - extracted, fsize - offset));
+			count -= part;
+			iov_offset += part;
+			extracted += part;
+
+			*pages = folio_page(folio, offset / PAGE_SIZE);
+			get_page(*pages);
+			pages++;
+			maxpages--;
+		}
+
+		if (maxpages == 0 || extracted >= maxsize)
+			break;
+
+		if (iov_offset >= fsize) {
+			iov_offset = 0;
+			slot++;
+			if (slot == folioq_nr_slots(folioq) && folioq->next) {
+				folioq = folioq->next;
+				slot = 0;
+			}
+		}
+	}
+
+	iter->count = count;
+	iter->iov_offset = iov_offset;
+	iter->folioq = folioq;
+	iter->folioq_slot = slot;
+	return extracted;
+}
+
 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
 					  pgoff_t index, unsigned int nr_pages)
 {
@@ -1034,6 +1202,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		}
 		return maxsize;
 	}
+	if (iov_iter_is_folioq(i))
+		return iter_folioq_get_pages(i, pages, maxsize, maxpages, start);
 	if (iov_iter_is_xarray(i))
 		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
 	return -EFAULT;
@@ -1118,6 +1288,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 		return iov_npages(i, maxpages);
 	if (iov_iter_is_bvec(i))
 		return bvec_npages(i, maxpages);
+	if (iov_iter_is_folioq(i)) {
+		unsigned offset = i->iov_offset % PAGE_SIZE;
+		int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
+		return min(npages, maxpages);
+	}
 	if (iov_iter_is_xarray(i)) {
 		unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
 		int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
@@ -1399,6 +1574,68 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
 }
 
 /*
+ * Extract a list of contiguous pages from an ITER_FOLIOQ iterator.  This does
+ * not get references on the pages, nor does it get a pin on them.
+ */
+static ssize_t iov_iter_extract_folioq_pages(struct iov_iter *i,
+					     struct page ***pages, size_t maxsize,
+					     unsigned int maxpages,
+					     iov_iter_extraction_t extraction_flags,
+					     size_t *offset0)
+{
+	const struct folio_queue *folioq = i->folioq;
+	struct page **p;
+	unsigned int nr = 0;
+	size_t extracted = 0, offset, slot = i->folioq_slot;
+
+	if (slot >= folioq_nr_slots(folioq)) {
+		folioq = folioq->next;
+		slot = 0;
+		if (WARN_ON(i->iov_offset != 0))
+			return -EIO;
+	}
+
+	offset = i->iov_offset & ~PAGE_MASK;
+	*offset0 = offset;
+
+	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	p = *pages;
+
+	for (;;) {
+		struct folio *folio = folioq_folio(folioq, slot);
+		size_t offset = i->iov_offset, fsize = folioq_folio_size(folioq, slot);
+		size_t part = PAGE_SIZE - offset % PAGE_SIZE;
+
+		if (offset < fsize) {
+			part = umin(part, umin(maxsize - extracted, fsize - offset));
+			i->count -= part;
+			i->iov_offset += part;
+			extracted += part;
+
+			p[nr++] = folio_page(folio, offset / PAGE_SIZE);
+		}
+
+		if (nr >= maxpages || extracted >= maxsize)
+			break;
+
+		if (i->iov_offset >= fsize) {
+			i->iov_offset = 0;
+			slot++;
+			if (slot == folioq_nr_slots(folioq) && folioq->next) {
+				folioq = folioq->next;
+				slot = 0;
+			}
+		}
+	}
+
+	i->folioq = folioq;
+	i->folioq_slot = slot;
+	return extracted;
+}
+
+/*
  * Extract a list of contiguous pages from an ITER_XARRAY iterator.  This does not
  * get references on the pages, nor does it get a pin on them.
  */
@@ -1445,8 +1682,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
 }
 
 /*
- * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
  */
 static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   struct page ***pages, size_t maxsize,
@@ -1454,35 +1691,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   iov_iter_extraction_t extraction_flags,
 					   size_t *offset0)
 {
-	struct page **p, *page;
-	size_t skip = i->iov_offset, offset, size;
-	int k;
+	size_t skip = i->iov_offset, size = 0;
+	struct bvec_iter bi;
+	int k = 0;
 
-	for (;;) {
-		if (i->nr_segs == 0)
-			return 0;
-		size = min(maxsize, i->bvec->bv_len - skip);
-		if (size)
-			break;
+	if (i->nr_segs == 0)
+		return 0;
+
+	if (i->iov_offset == i->bvec->bv_len) {
 		i->iov_offset = 0;
 		i->nr_segs--;
 		i->bvec++;
 		skip = 0;
 	}
+	bi.bi_idx = 0;
+	bi.bi_size = maxsize;
+	bi.bi_bvec_done = skip;
+
+	maxpages = want_pages_array(pages, maxsize, skip, maxpages);
+
+	while (bi.bi_size && bi.bi_idx < i->nr_segs) {
+		struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
+
+		/*
+		 * The iov_iter_extract_pages interface only allows an offset
+		 * into the first page.  Break out of the loop if we see an
+		 * offset into subsequent pages, the caller will have to call
+		 * iov_iter_extract_pages again for the reminder.
+		 */
+		if (k) {
+			if (bv.bv_offset)
+				break;
+		} else {
+			*offset0 = bv.bv_offset;
+		}
 
-	skip += i->bvec->bv_offset;
-	page = i->bvec->bv_page + skip / PAGE_SIZE;
-	offset = skip % PAGE_SIZE;
-	*offset0 = offset;
+		(*pages)[k++] = bv.bv_page;
+		size += bv.bv_len;
 
-	maxpages = want_pages_array(pages, size, offset, maxpages);
-	if (!maxpages)
-		return -ENOMEM;
-	p = *pages;
-	for (k = 0; k < maxpages; k++)
-		p[k] = page + k;
+		if (k >= maxpages)
+			break;
+
+		/*
+		 * We are done when the end of the bvec doesn't align to a page
+		 * boundary as that would create a hole in the returned space.
+		 * The caller will handle this with another call to
+		 * iov_iter_extract_pages.
+		 */
+		if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+			break;
+
+		bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
+	}
 
-	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
 	iov_iter_advance(i, size);
 	return size;
 }
@@ -1618,8 +1879,8 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
  *      added to the pages, but refs will not be taken.
  *      iov_iter_extract_will_pin() will return true.
  *
- *  (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are
- *      merely listed; no extra refs or pins are obtained.
+ *  (*) If the iterator is ITER_KVEC, ITER_BVEC, ITER_FOLIOQ or ITER_XARRAY, the
+ *      pages are merely listed; no extra refs or pins are obtained.
  *      iov_iter_extract_will_pin() will return 0.
  *
  * Note also:
@@ -1654,6 +1915,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i,
 		return iov_iter_extract_bvec_pages(i, pages, maxsize,
 						   maxpages, extraction_flags,
 						   offset0);
+	if (iov_iter_is_folioq(i))
+		return iov_iter_extract_folioq_pages(i, pages, maxsize,
+						     maxpages, extraction_flags,
+						     offset0);
 	if (iov_iter_is_xarray(i))
 		return iov_iter_extract_xarray_pages(i, pages, maxsize,
 						     maxpages, extraction_flags,
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 2d5329a42105..08b242bbdbdf 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -75,7 +75,7 @@ void irq_poll_complete(struct irq_poll *iop)
 }
 EXPORT_SYMBOL(irq_poll_complete);
 
-static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
+static void __latent_entropy irq_poll_softirq(void)
 {
 	struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
 	int rearm = 0, budget = irq_poll_budget;
diff --git a/lib/is_signed_type_kunit.c b/lib/is_signed_type_kunit.c
index 0a7f6ae62839..88adbe813f3a 100644
--- a/lib/is_signed_type_kunit.c
+++ b/lib/is_signed_type_kunit.c
@@ -46,4 +46,5 @@ static struct kunit_suite is_signed_type_test_suite = {
 
 kunit_test_suite(is_signed_type_test_suite);
 
+MODULE_DESCRIPTION("is_signed_type() KUnit test suite");
 MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 12f5a347aa13..a8b2eed90599 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -5,13 +5,14 @@
  * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net>
  */
 
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/export.h>
+#include <linux/kfifo.h>
 #include <linux/log2.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
 #include <linux/uaccess.h>
-#include <linux/kfifo.h>
 
 /*
  * internal helper to calculate the unused elements in a fifo
@@ -163,6 +164,19 @@ unsigned int __kfifo_out_peek(struct __kfifo *fifo,
 }
 EXPORT_SYMBOL(__kfifo_out_peek);
 
+unsigned int __kfifo_out_linear(struct __kfifo *fifo,
+		unsigned int *tail, unsigned int n)
+{
+	unsigned int size = fifo->mask + 1;
+	unsigned int off = fifo->out & fifo->mask;
+
+	if (tail)
+		*tail = off;
+
+	return min3(n, fifo->in - fifo->out, size - off);
+}
+EXPORT_SYMBOL(__kfifo_out_linear);
+
 unsigned int __kfifo_out(struct __kfifo *fifo,
 		void *buf, unsigned int len)
 {
@@ -292,51 +306,31 @@ int __kfifo_to_user(struct __kfifo *fifo, void __user *to,
 }
 EXPORT_SYMBOL(__kfifo_to_user);
 
-static int setup_sgl_buf(struct scatterlist *sgl, void *buf,
-		int nents, unsigned int len)
+static unsigned int setup_sgl_buf(struct __kfifo *fifo, struct scatterlist *sgl,
+				  unsigned int data_offset, int nents,
+				  unsigned int len, dma_addr_t dma)
 {
-	int n;
-	unsigned int l;
-	unsigned int off;
-	struct page *page;
+	const void *buf = fifo->data + data_offset;
 
-	if (!nents)
+	if (!nents || !len)
 		return 0;
 
-	if (!len)
-		return 0;
+	sg_set_buf(sgl, buf, len);
 
-	n = 0;
-	page = virt_to_page(buf);
-	off = offset_in_page(buf);
-	l = 0;
-
-	while (len >= l + PAGE_SIZE - off) {
-		struct page *npage;
-
-		l += PAGE_SIZE;
-		buf += PAGE_SIZE;
-		npage = virt_to_page(buf);
-		if (page_to_phys(page) != page_to_phys(npage) - l) {
-			sg_set_page(sgl, page, l - off, off);
-			sgl = sg_next(sgl);
-			if (++n == nents || sgl == NULL)
-				return n;
-			page = npage;
-			len -= l - off;
-			l = off = 0;
-		}
+	if (dma != DMA_MAPPING_ERROR) {
+		sg_dma_address(sgl) = dma + data_offset;
+		sg_dma_len(sgl) = len;
 	}
-	sg_set_page(sgl, page, len, off);
-	return n + 1;
+
+	return 1;
 }
 
 static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl,
-		int nents, unsigned int len, unsigned int off)
+		int nents, unsigned int len, unsigned int off, dma_addr_t dma)
 {
 	unsigned int size = fifo->mask + 1;
 	unsigned int esize = fifo->esize;
-	unsigned int l;
+	unsigned int len_to_end;
 	unsigned int n;
 
 	off &= fifo->mask;
@@ -345,16 +339,17 @@ static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl,
 		size *= esize;
 		len *= esize;
 	}
-	l = min(len, size - off);
+	len_to_end = min(len, size - off);
 
-	n = setup_sgl_buf(sgl, fifo->data + off, nents, l);
-	n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l);
+	n = setup_sgl_buf(fifo, sgl, off, nents, len_to_end, dma);
+	n += setup_sgl_buf(fifo, sgl + n, 0, nents - n, len - len_to_end, dma);
 
 	return n;
 }
 
 unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo,
-		struct scatterlist *sgl, int nents, unsigned int len)
+		struct scatterlist *sgl, int nents, unsigned int len,
+		dma_addr_t dma)
 {
 	unsigned int l;
 
@@ -362,12 +357,13 @@ unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo,
 	if (len > l)
 		len = l;
 
-	return setup_sgl(fifo, sgl, nents, len, fifo->in);
+	return setup_sgl(fifo, sgl, nents, len, fifo->in, dma);
 }
 EXPORT_SYMBOL(__kfifo_dma_in_prepare);
 
 unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo,
-		struct scatterlist *sgl, int nents, unsigned int len)
+		struct scatterlist *sgl, int nents, unsigned int len,
+		dma_addr_t dma)
 {
 	unsigned int l;
 
@@ -375,7 +371,7 @@ unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo,
 	if (len > l)
 		len = l;
 
-	return setup_sgl(fifo, sgl, nents, len, fifo->out);
+	return setup_sgl(fifo, sgl, nents, len, fifo->out, dma);
 }
 EXPORT_SYMBOL(__kfifo_dma_out_prepare);
 
@@ -473,6 +469,19 @@ unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf,
 }
 EXPORT_SYMBOL(__kfifo_out_peek_r);
 
+unsigned int __kfifo_out_linear_r(struct __kfifo *fifo,
+		unsigned int *tail, unsigned int n, size_t recsize)
+{
+	if (fifo->in == fifo->out)
+		return 0;
+
+	if (tail)
+		*tail = fifo->out + recsize;
+
+	return min(n, __kfifo_peek_n(fifo, recsize));
+}
+EXPORT_SYMBOL(__kfifo_out_linear_r);
+
 unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
 		unsigned int len, size_t recsize)
 {
@@ -546,7 +555,8 @@ int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to,
 EXPORT_SYMBOL(__kfifo_to_user_r);
 
 unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
-	struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
+	struct scatterlist *sgl, int nents, unsigned int len, size_t recsize,
+	dma_addr_t dma)
 {
 	BUG_ON(!nents);
 
@@ -555,7 +565,7 @@ unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
 	if (len + recsize > kfifo_unused(fifo))
 		return 0;
 
-	return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize);
+	return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize, dma);
 }
 EXPORT_SYMBOL(__kfifo_dma_in_prepare_r);
 
@@ -569,7 +579,8 @@ void __kfifo_dma_in_finish_r(struct __kfifo *fifo,
 EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
 
 unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
-	struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
+	struct scatterlist *sgl, int nents, unsigned int len, size_t recsize,
+	dma_addr_t dma)
 {
 	BUG_ON(!nents);
 
@@ -578,15 +589,7 @@ unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
 	if (len + recsize > fifo->in - fifo->out)
 		return 0;
 
-	return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize);
+	return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize, dma);
 }
 EXPORT_SYMBOL(__kfifo_dma_out_prepare_r);
 
-void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize)
-{
-	unsigned int len;
-
-	len = __kfifo_peek_n(fifo, recsize);
-	fifo->out += len + recsize;
-}
-EXPORT_SYMBOL(__kfifo_dma_out_finish_r);
diff --git a/lib/kobject.c b/lib/kobject.c
index 72fa20f405f1..abe5f5b856ce 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -1096,30 +1096,6 @@ void *kobj_ns_grab_current(enum kobj_ns_type type)
 }
 EXPORT_SYMBOL_GPL(kobj_ns_grab_current);
 
-const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
-{
-	const void *ns = NULL;
-
-	spin_lock(&kobj_ns_type_lock);
-	if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type])
-		ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
-	spin_unlock(&kobj_ns_type_lock);
-
-	return ns;
-}
-
-const void *kobj_ns_initial(enum kobj_ns_type type)
-{
-	const void *ns = NULL;
-
-	spin_lock(&kobj_ns_type_lock);
-	if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type])
-		ns = kobj_ns_ops_tbl[type]->initial_ns();
-	spin_unlock(&kobj_ns_type_lock);
-
-	return ns;
-}
-
 void kobj_ns_drop(enum kobj_ns_type type, void *ns)
 {
 	spin_lock(&kobj_ns_type_lock);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 03b427e2707e..b7f2fa08d9c8 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -433,8 +433,23 @@ static void zap_modalias_env(struct kobj_uevent_env *env)
 		len = strlen(env->envp[i]) + 1;
 
 		if (i != env->envp_idx - 1) {
+			/* @env->envp[] contains pointers to @env->buf[]
+			 * with @env->buflen chars, and we are removing
+			 * variable MODALIAS here pointed by @env->envp[i]
+			 * with length @len as shown below:
+			 *
+			 * 0               @env->buf[]      @env->buflen
+			 * ---------------------------------------------
+			 * ^             ^              ^              ^
+			 * |             |->   @len   <-| target block |
+			 * @env->envp[0] @env->envp[i]  @env->envp[i + 1]
+			 *
+			 * so the "target block" indicated above is moved
+			 * backward by @len, and its right size is
+			 * @env->buflen - (@env->envp[i + 1] - @env->envp[0]).
+			 */
 			memmove(env->envp[i], env->envp[i + 1],
-				env->buflen - len);
+				env->buflen - (env->envp[i + 1] - env->envp[0]));
 
 			for (j = i; j < env->envp_idx - 1; j++)
 				env->envp[j] = env->envp[j + 1] - len;
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index 34d7242d526d..a97897edd964 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -81,4 +81,16 @@ config KUNIT_DEFAULT_ENABLED
 	  In most cases this should be left as Y. Only if additional opt-in
 	  behavior is needed should this be set to N.
 
+config KUNIT_AUTORUN_ENABLED
+	bool "Default value of kunit.autorun"
+	default y
+	help
+	  Sets the default value of kunit.autorun. If set to N then KUnit
+	  tests will not run after initialization unless kunit.autorun=1 is
+	  passed to the kernel command line. The test can still be run manually
+	  via debugfs interface.
+
+	  In most cases this should be left as Y. Only if additional opt-in
+	  behavior is needed should this be set to N.
+
 endif # KUNIT
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index 309659a32a78..5aa51978e456 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -2,13 +2,15 @@ obj-$(CONFIG_KUNIT) +=			kunit.o
 
 kunit-objs +=				test.o \
 					resource.o \
+					user_alloc.o \
 					static_stub.o \
 					string-stream.o \
 					assert.o \
 					try-catch.o \
 					executor.o \
 					attributes.o \
-					device.o
+					device.o \
+					platform.o
 
 ifeq ($(CONFIG_KUNIT_DEBUGFS),y)
 kunit-objs +=				debugfs.o
@@ -18,10 +20,12 @@ endif
 obj-y +=				hooks.o
 
 obj-$(CONFIG_KUNIT_TEST) +=		kunit-test.o
+obj-$(CONFIG_KUNIT_TEST) +=		platform-test.o
 
 # string-stream-test compiles built-in only.
 ifeq ($(CONFIG_KUNIT_TEST),y)
 obj-$(CONFIG_KUNIT_TEST) +=		string-stream-test.o
+obj-$(CONFIG_KUNIT_TEST) +=		assert_test.o
 endif
 
 obj-$(CONFIG_KUNIT_EXAMPLE_TEST) +=	kunit-example-test.o
diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c
index dd1d633d0fe2..867aa5c4bccf 100644
--- a/lib/kunit/assert.c
+++ b/lib/kunit/assert.c
@@ -7,6 +7,7 @@
  */
 #include <kunit/assert.h>
 #include <kunit/test.h>
+#include <kunit/visibility.h>
 
 #include "string-stream.h"
 
@@ -30,8 +31,9 @@ void kunit_assert_prologue(const struct kunit_loc *loc,
 }
 EXPORT_SYMBOL_GPL(kunit_assert_prologue);
 
-static void kunit_assert_print_msg(const struct va_format *message,
-				   struct string_stream *stream)
+VISIBLE_IF_KUNIT
+void kunit_assert_print_msg(const struct va_format *message,
+			    struct string_stream *stream)
 {
 	if (message->fmt)
 		string_stream_add(stream, "\n%pV", message);
@@ -89,7 +91,7 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert,
 EXPORT_SYMBOL_GPL(kunit_ptr_not_err_assert_format);
 
 /* Checks if `text` is a literal representing `value`, e.g. "5" and 5 */
-static bool is_literal(const char *text, long long value)
+VISIBLE_IF_KUNIT bool is_literal(const char *text, long long value)
 {
 	char *buffer;
 	int len;
@@ -166,7 +168,7 @@ EXPORT_SYMBOL_GPL(kunit_binary_ptr_assert_format);
 /* Checks if KUNIT_EXPECT_STREQ() args were string literals.
  * Note: `text` will have ""s where as `value` will not.
  */
-static bool is_str_literal(const char *text, const char *value)
+VISIBLE_IF_KUNIT bool is_str_literal(const char *text, const char *value)
 {
 	int len;
 
@@ -208,10 +210,11 @@ EXPORT_SYMBOL_GPL(kunit_binary_str_assert_format);
 /* Adds a hexdump of a buffer to a string_stream comparing it with
  * a second buffer. The different bytes are marked with <>.
  */
-static void kunit_assert_hexdump(struct string_stream *stream,
-				 const void *buf,
-				 const void *compared_buf,
-				 const size_t len)
+VISIBLE_IF_KUNIT
+void kunit_assert_hexdump(struct string_stream *stream,
+			  const void *buf,
+			  const void *compared_buf,
+			  const size_t len)
 {
 	size_t i;
 	const u8 *buf1 = buf;
diff --git a/lib/kunit/assert_test.c b/lib/kunit/assert_test.c
new file mode 100644
index 000000000000..4a5967712186
--- /dev/null
+++ b/lib/kunit/assert_test.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * KUnit test for the assertion formatting functions.
+ * Author: Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include <kunit/test.h>
+#include "string-stream.h"
+
+#define TEST_PTR_EXPECTED_BUF_SIZE 32
+#define HEXDUMP_TEST_BUF_LEN 5
+#define ASSERT_TEST_EXPECT_CONTAIN(test, str, substr) KUNIT_EXPECT_TRUE(test, strstr(str, substr))
+#define ASSERT_TEST_EXPECT_NCONTAIN(test, str, substr) KUNIT_EXPECT_FALSE(test, strstr(str, substr))
+
+static void kunit_test_is_literal(struct kunit *test)
+{
+	KUNIT_EXPECT_TRUE(test, is_literal("5", 5));
+	KUNIT_EXPECT_TRUE(test, is_literal("0", 0));
+	KUNIT_EXPECT_TRUE(test, is_literal("1234567890", 1234567890));
+	KUNIT_EXPECT_TRUE(test, is_literal("-1234567890", -1234567890));
+	KUNIT_EXPECT_FALSE(test, is_literal("05", 5));
+	KUNIT_EXPECT_FALSE(test, is_literal("", 0));
+	KUNIT_EXPECT_FALSE(test, is_literal("-0", 0));
+	KUNIT_EXPECT_FALSE(test, is_literal("12#45", 1245));
+}
+
+static void kunit_test_is_str_literal(struct kunit *test)
+{
+	KUNIT_EXPECT_TRUE(test, is_str_literal("\"Hello, World!\"", "Hello, World!"));
+	KUNIT_EXPECT_TRUE(test, is_str_literal("\"\"", ""));
+	KUNIT_EXPECT_TRUE(test, is_str_literal("\"\"\"", "\""));
+	KUNIT_EXPECT_FALSE(test, is_str_literal("", ""));
+	KUNIT_EXPECT_FALSE(test, is_str_literal("\"", "\""));
+	KUNIT_EXPECT_FALSE(test, is_str_literal("\"Abacaba", "Abacaba"));
+	KUNIT_EXPECT_FALSE(test, is_str_literal("Abacaba\"", "Abacaba"));
+	KUNIT_EXPECT_FALSE(test, is_str_literal("\"Abacaba\"", "\"Abacaba\""));
+}
+
+KUNIT_DEFINE_ACTION_WRAPPER(kfree_wrapper, kfree, const void *);
+
+/* this function is used to get a "char *" string from the string stream and defer its cleanup  */
+static char *get_str_from_stream(struct kunit *test, struct string_stream *stream)
+{
+	char *str = string_stream_get_string(stream);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, str);
+	kunit_add_action(test, kfree_wrapper, (void *)str);
+
+	return str;
+}
+
+static void kunit_test_assert_prologue(struct kunit *test)
+{
+	struct string_stream *stream;
+	char *str;
+	const struct kunit_loc location = {
+		.file = "testfile.c",
+		.line = 1337,
+	};
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	/* Test an expectation fail prologue */
+	kunit_assert_prologue(&location, KUNIT_EXPECTATION, stream);
+	str = get_str_from_stream(test, stream);
+	ASSERT_TEST_EXPECT_CONTAIN(test, str, "EXPECTATION");
+	ASSERT_TEST_EXPECT_CONTAIN(test, str, "testfile.c");
+	ASSERT_TEST_EXPECT_CONTAIN(test, str, "1337");
+
+	/* Test an assertion fail prologue */
+	string_stream_clear(stream);
+	kunit_assert_prologue(&location, KUNIT_ASSERTION, stream);
+	str = get_str_from_stream(test, stream);
+	ASSERT_TEST_EXPECT_CONTAIN(test, str, "ASSERTION");
+	ASSERT_TEST_EXPECT_CONTAIN(test, str, "testfile.c");
+	ASSERT_TEST_EXPECT_CONTAIN(test, str, "1337");
+}
+
+/*
+ * This function accepts an arbitrary count of parameters and generates a va_format struct,
+ * which can be used to validate kunit_assert_print_msg function
+ */
+static void verify_assert_print_msg(struct kunit *test,
+				    struct string_stream *stream,
+				    char *expected, const char *format, ...)
+{
+	va_list list;
+	const struct va_format vformat = {
+		.fmt = format,
+		.va = &list,
+	};
+
+	va_start(list, format);
+	string_stream_clear(stream);
+	kunit_assert_print_msg(&vformat, stream);
+	KUNIT_EXPECT_STREQ(test, get_str_from_stream(test, stream), expected);
+}
+
+static void kunit_test_assert_print_msg(struct kunit *test)
+{
+	struct string_stream *stream;
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	verify_assert_print_msg(test, stream, "\nTest", "Test");
+	verify_assert_print_msg(test, stream, "\nAbacaba -123 234", "%s %d %u",
+				"Abacaba", -123, 234U);
+	verify_assert_print_msg(test, stream, "", NULL);
+}
+
+/*
+ * Further code contains the tests for different assert format functions.
+ * This helper function accepts the assert format function, executes it and
+ * validates the result string from the stream by checking that all of the
+ * substrings exist in the output.
+ */
+static void validate_assert(assert_format_t format_func, struct kunit *test,
+			    const struct kunit_assert *assert,
+			    struct string_stream *stream, int num_checks, ...)
+{
+	size_t i;
+	va_list checks;
+	char *cur_substr_exp;
+	struct va_format message = { NULL, NULL };
+
+	va_start(checks, num_checks);
+	string_stream_clear(stream);
+	format_func(assert, &message, stream);
+
+	for (i = 0; i < num_checks; i++) {
+		cur_substr_exp = va_arg(checks, char *);
+		ASSERT_TEST_EXPECT_CONTAIN(test, get_str_from_stream(test, stream), cur_substr_exp);
+	}
+}
+
+static void kunit_test_unary_assert_format(struct kunit *test)
+{
+	struct string_stream *stream;
+	struct kunit_assert assert = {};
+	struct kunit_unary_assert un_assert = {
+		.assert = assert,
+		.condition = "expr",
+		.expected_true = true,
+	};
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	validate_assert(kunit_unary_assert_format, test, &un_assert.assert,
+			stream, 2, "true", "is false");
+
+	un_assert.expected_true = false;
+	validate_assert(kunit_unary_assert_format, test, &un_assert.assert,
+			stream, 2, "false", "is true");
+}
+
+static void kunit_test_ptr_not_err_assert_format(struct kunit *test)
+{
+	struct string_stream *stream;
+	struct kunit_assert assert = {};
+	struct kunit_ptr_not_err_assert not_err_assert = {
+		.assert = assert,
+		.text = "expr",
+		.value = NULL,
+	};
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	/* Value is NULL. The corresponding message should be printed out */
+	validate_assert(kunit_ptr_not_err_assert_format, test,
+			&not_err_assert.assert,
+			stream, 1, "null");
+
+	/* Value is not NULL, but looks like an error pointer. Error should be printed out */
+	not_err_assert.value = (void *)-12;
+	validate_assert(kunit_ptr_not_err_assert_format, test,
+			&not_err_assert.assert, stream, 2,
+			"error", "-12");
+}
+
+static void kunit_test_binary_assert_format(struct kunit *test)
+{
+	struct string_stream *stream;
+	struct kunit_assert assert = {};
+	struct kunit_binary_assert_text text = {
+		.left_text = "1 + 2",
+		.operation = "==",
+		.right_text = "2",
+	};
+	const struct kunit_binary_assert binary_assert = {
+		.assert = assert,
+		.text = &text,
+		.left_value = 3,
+		.right_value = 2,
+	};
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	/*
+	 * Printed values should depend on the input we provide: the left text, right text, left
+	 * value and the right value.
+	 */
+	validate_assert(kunit_binary_assert_format, test, &binary_assert.assert,
+			stream, 4, "1 + 2", "2", "3", "==");
+
+	text.right_text = "4 - 2";
+	validate_assert(kunit_binary_assert_format, test, &binary_assert.assert,
+			stream, 3, "==", "1 + 2", "4 - 2");
+
+	text.left_text = "3";
+	validate_assert(kunit_binary_assert_format, test, &binary_assert.assert,
+			stream, 4, "3", "4 - 2", "2", "==");
+
+	text.right_text = "2";
+	validate_assert(kunit_binary_assert_format, test, &binary_assert.assert,
+			stream, 3, "3", "2", "==");
+}
+
+static void kunit_test_binary_ptr_assert_format(struct kunit *test)
+{
+	struct string_stream *stream;
+	struct kunit_assert assert = {};
+	char *addr_var_a, *addr_var_b;
+	static const void *var_a = (void *)0xDEADBEEF;
+	static const void *var_b = (void *)0xBADDCAFE;
+	struct kunit_binary_assert_text text = {
+		.left_text = "var_a",
+		.operation = "==",
+		.right_text = "var_b",
+	};
+	struct kunit_binary_ptr_assert binary_ptr_assert = {
+		.assert = assert,
+		.text = &text,
+		.left_value = var_a,
+		.right_value = var_b,
+	};
+
+	addr_var_a = kunit_kzalloc(test, TEST_PTR_EXPECTED_BUF_SIZE, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addr_var_a);
+	addr_var_b = kunit_kzalloc(test, TEST_PTR_EXPECTED_BUF_SIZE, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addr_var_b);
+	/*
+	 * Print the addresses to the buffers first.
+	 * This is necessary as we may have different count of leading zeros in the pointer
+	 * on different architectures.
+	 */
+	snprintf(addr_var_a, TEST_PTR_EXPECTED_BUF_SIZE, "%px", var_a);
+	snprintf(addr_var_b, TEST_PTR_EXPECTED_BUF_SIZE, "%px", var_b);
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+	validate_assert(kunit_binary_ptr_assert_format, test, &binary_ptr_assert.assert,
+			stream, 3, addr_var_a, addr_var_b, "==");
+}
+
+static void kunit_test_binary_str_assert_format(struct kunit *test)
+{
+	struct string_stream *stream;
+	struct kunit_assert assert = {};
+	static const char *var_a = "abacaba";
+	static const char *var_b = "kernel";
+	struct kunit_binary_assert_text text = {
+		.left_text = "var_a",
+		.operation = "==",
+		.right_text = "var_b",
+	};
+	struct kunit_binary_str_assert binary_str_assert = {
+		.assert = assert,
+		.text = &text,
+		.left_value = var_a,
+		.right_value = var_b,
+	};
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	validate_assert(kunit_binary_str_assert_format, test,
+			&binary_str_assert.assert,
+			stream, 5, "var_a", "var_b", "\"abacaba\"",
+			"\"kernel\"", "==");
+
+	text.left_text = "\"abacaba\"";
+	validate_assert(kunit_binary_str_assert_format, test, &binary_str_assert.assert,
+			stream, 4, "\"abacaba\"", "var_b", "\"kernel\"", "==");
+
+	text.right_text = "\"kernel\"";
+	validate_assert(kunit_binary_str_assert_format, test, &binary_str_assert.assert,
+			stream, 3, "\"abacaba\"", "\"kernel\"", "==");
+}
+
+static const u8 hex_testbuf1[] = { 0x26, 0x74, 0x6b, 0x9c, 0x55,
+				   0x45, 0x9d, 0x47, 0xd6, 0x47,
+				   0x2,  0x89, 0x8c, 0x81, 0x94,
+				   0x12, 0xfe, 0x01 };
+static const u8 hex_testbuf2[] = { 0x26, 0x74, 0x6b, 0x9c, 0x55,
+				   0x45, 0x9d, 0x47, 0x21, 0x47,
+				   0xcd, 0x89, 0x24, 0x50, 0x94,
+				   0x12, 0xba, 0x01 };
+static void kunit_test_assert_hexdump(struct kunit *test)
+{
+	struct string_stream *stream;
+	char *str;
+	size_t i;
+	char buf[HEXDUMP_TEST_BUF_LEN];
+
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+	/* Check that we are getting output like <xx> for non-matching numbers. */
+	kunit_assert_hexdump(stream, hex_testbuf1, hex_testbuf2, sizeof(hex_testbuf1));
+	str = get_str_from_stream(test, stream);
+	for (i = 0; i < sizeof(hex_testbuf1); i++) {
+		snprintf(buf, HEXDUMP_TEST_BUF_LEN, "<%02x>", hex_testbuf1[i]);
+		if (hex_testbuf1[i] != hex_testbuf2[i])
+			ASSERT_TEST_EXPECT_CONTAIN(test, str, buf);
+	}
+	/* We shouldn't get any <xx> numbers when comparing the buffer with itself. */
+	string_stream_clear(stream);
+	kunit_assert_hexdump(stream, hex_testbuf1, hex_testbuf1, sizeof(hex_testbuf1));
+	str = get_str_from_stream(test, stream);
+	ASSERT_TEST_EXPECT_NCONTAIN(test, str, "<");
+	ASSERT_TEST_EXPECT_NCONTAIN(test, str, ">");
+}
+
+static void kunit_test_mem_assert_format(struct kunit *test)
+{
+	struct string_stream *stream;
+	struct string_stream *expected_stream;
+	struct kunit_assert assert = {};
+	static const struct kunit_binary_assert_text text = {
+		.left_text = "hex_testbuf1",
+		.operation = "==",
+		.right_text = "hex_testbuf2",
+	};
+	struct kunit_mem_assert mem_assert = {
+		.assert = assert,
+		.text = &text,
+		.left_value = NULL,
+		.right_value = hex_testbuf2,
+		.size = sizeof(hex_testbuf1),
+	};
+
+	expected_stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, expected_stream);
+	stream = kunit_alloc_string_stream(test, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream);
+
+	/* The left value is NULL */
+	validate_assert(kunit_mem_assert_format, test, &mem_assert.assert,
+			stream, 2, "hex_testbuf1", "is not null");
+
+	/* The right value is NULL, the left value is not NULL */
+	mem_assert.left_value = hex_testbuf1;
+	mem_assert.right_value = NULL;
+	validate_assert(kunit_mem_assert_format, test, &mem_assert.assert,
+			stream, 2, "hex_testbuf2", "is not null");
+
+	/* Both arguments are not null */
+	mem_assert.left_value = hex_testbuf1;
+	mem_assert.right_value = hex_testbuf2;
+
+	validate_assert(kunit_mem_assert_format, test, &mem_assert.assert,
+			stream, 3, "hex_testbuf1", "hex_testbuf2", "==");
+}
+
+static struct kunit_case assert_test_cases[] = {
+	KUNIT_CASE(kunit_test_is_literal),
+	KUNIT_CASE(kunit_test_is_str_literal),
+	KUNIT_CASE(kunit_test_assert_prologue),
+	KUNIT_CASE(kunit_test_assert_print_msg),
+	KUNIT_CASE(kunit_test_unary_assert_format),
+	KUNIT_CASE(kunit_test_ptr_not_err_assert_format),
+	KUNIT_CASE(kunit_test_binary_assert_format),
+	KUNIT_CASE(kunit_test_binary_ptr_assert_format),
+	KUNIT_CASE(kunit_test_binary_str_assert_format),
+	KUNIT_CASE(kunit_test_assert_hexdump),
+	KUNIT_CASE(kunit_test_mem_assert_format),
+	{}
+};
+
+static struct kunit_suite assert_test_suite = {
+	.name = "kunit-assert",
+	.test_cases = assert_test_cases,
+};
+
+kunit_test_suites(&assert_test_suite);
diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c
index d548750a325a..9c326f1837bd 100644
--- a/lib/kunit/debugfs.c
+++ b/lib/kunit/debugfs.c
@@ -145,7 +145,7 @@ static ssize_t debugfs_run(struct file *file,
 	struct inode *f_inode = file->f_inode;
 	struct kunit_suite *suite = (struct kunit_suite *) f_inode->i_private;
 
-	__kunit_test_suites_init(&suite, 1);
+	__kunit_test_suites_init(&suite, 1, true);
 
 	return count;
 }
@@ -181,7 +181,7 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite)
 	 * successfully.
 	 */
 	stream = alloc_string_stream(GFP_KERNEL);
-	if (IS_ERR_OR_NULL(stream))
+	if (IS_ERR(stream))
 		return;
 
 	string_stream_set_append_newlines(stream, true);
@@ -189,7 +189,7 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite)
 
 	kunit_suite_for_each_test_case(suite, test_case) {
 		stream = alloc_string_stream(GFP_KERNEL);
-		if (IS_ERR_OR_NULL(stream))
+		if (IS_ERR(stream))
 			goto err;
 
 		string_stream_set_append_newlines(stream, true);
@@ -212,8 +212,11 @@ void kunit_debugfs_create_suite(struct kunit_suite *suite)
 
 err:
 	string_stream_destroy(suite->log);
-	kunit_suite_for_each_test_case(suite, test_case)
+	suite->log = NULL;
+	kunit_suite_for_each_test_case(suite, test_case) {
 		string_stream_destroy(test_case->log);
+		test_case->log = NULL;
+	}
 }
 
 void kunit_debugfs_destroy_suite(struct kunit_suite *suite)
diff --git a/lib/kunit/device.c b/lib/kunit/device.c
index 25c81ed465fb..520c1fccee8a 100644
--- a/lib/kunit/device.c
+++ b/lib/kunit/device.c
@@ -89,7 +89,7 @@ struct device_driver *kunit_driver_create(struct kunit *test, const char *name)
 	if (!driver)
 		return ERR_PTR(err);
 
-	driver->name = name;
+	driver->name = kunit_kstrdup_const(test, name, GFP_KERNEL);
 	driver->bus = &kunit_bus_type;
 	driver->owner = THIS_MODULE;
 
@@ -192,8 +192,11 @@ void kunit_device_unregister(struct kunit *test, struct device *dev)
 	const struct device_driver *driver = to_kunit_device(dev)->driver;
 
 	kunit_release_action(test, device_unregister_wrapper, dev);
-	if (driver)
+	if (driver) {
+		const char *driver_name = driver->name;
 		kunit_release_action(test, driver_unregister_wrapper, (void *)driver);
+		kunit_kfree_const(test, driver_name);
+	}
 }
 EXPORT_SYMBOL_GPL(kunit_device_unregister);
 
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 70b9a43cd257..3f39955cb0f1 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -29,6 +29,22 @@ const char *kunit_action(void)
 	return action_param;
 }
 
+/*
+ * Run KUnit tests after initialization
+ */
+#ifdef CONFIG_KUNIT_AUTORUN_ENABLED
+static bool autorun_param = true;
+#else
+static bool autorun_param;
+#endif
+module_param_named(autorun, autorun_param, bool, 0);
+MODULE_PARM_DESC(autorun, "Run KUnit tests after initialization");
+
+bool kunit_autorun(void)
+{
+	return autorun_param;
+}
+
 static char *filter_glob_param;
 static char *filter_param;
 static char *filter_action_param;
@@ -70,32 +86,26 @@ struct kunit_glob_filter {
 static int kunit_parse_glob_filter(struct kunit_glob_filter *parsed,
 				    const char *filter_glob)
 {
-	const int len = strlen(filter_glob);
 	const char *period = strchr(filter_glob, '.');
 
 	if (!period) {
-		parsed->suite_glob = kzalloc(len + 1, GFP_KERNEL);
+		parsed->suite_glob = kstrdup(filter_glob, GFP_KERNEL);
 		if (!parsed->suite_glob)
 			return -ENOMEM;
-
 		parsed->test_glob = NULL;
-		strcpy(parsed->suite_glob, filter_glob);
 		return 0;
 	}
 
-	parsed->suite_glob = kzalloc(period - filter_glob + 1, GFP_KERNEL);
+	parsed->suite_glob = kstrndup(filter_glob, period - filter_glob, GFP_KERNEL);
 	if (!parsed->suite_glob)
 		return -ENOMEM;
 
-	parsed->test_glob = kzalloc(len - (period - filter_glob) + 1, GFP_KERNEL);
+	parsed->test_glob = kstrdup(period + 1, GFP_KERNEL);
 	if (!parsed->test_glob) {
 		kfree(parsed->suite_glob);
 		return -ENOMEM;
 	}
 
-	strncpy(parsed->suite_glob, filter_glob, period - filter_glob);
-	strncpy(parsed->test_glob, period + 1, len - (period - filter_glob));
-
 	return 0;
 }
 
@@ -266,13 +276,14 @@ free_copy:
 void kunit_exec_run_tests(struct kunit_suite_set *suite_set, bool builtin)
 {
 	size_t num_suites = suite_set->end - suite_set->start;
+	bool autorun = kunit_autorun();
 
-	if (builtin || num_suites) {
+	if (autorun && (builtin || num_suites)) {
 		pr_info("KTAP version 1\n");
 		pr_info("1..%zu\n", num_suites);
 	}
 
-	__kunit_test_suites_init(suite_set->start, num_suites);
+	__kunit_test_suites_init(suite_set->start, num_suites, autorun);
 }
 
 void kunit_exec_list_tests(struct kunit_suite_set *suite_set, bool include_attr)
diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index 3f7f967e3688..f0090c2729cd 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -286,7 +286,7 @@ static struct kunit_suite *alloc_fake_suite(struct kunit *test,
 
 	/* We normally never expect to allocate suites, hence the non-const cast. */
 	suite = kunit_kzalloc(test, sizeof(*suite), GFP_KERNEL);
-	strncpy((char *)suite->name, suite_name, sizeof(suite->name) - 1);
+	strscpy((char *)suite->name, suite_name, sizeof(suite->name));
 	suite->test_cases = test_cases;
 
 	return suite;
diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c
index 798924f7cc86..3056d6bc705d 100644
--- a/lib/kunit/kunit-example-test.c
+++ b/lib/kunit/kunit-example-test.c
@@ -374,4 +374,5 @@ static struct kunit_suite example_init_test_suite = {
  */
 kunit_test_init_section_suites(&example_init_test_suite);
 
+MODULE_DESCRIPTION("Example KUnit test suite");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index e3412e0ca399..d9c781c859fd 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -805,6 +805,8 @@ static void kunit_device_driver_test(struct kunit *test)
 	struct device *test_device;
 	struct driver_test_state *test_state = kunit_kzalloc(test, sizeof(*test_state), GFP_KERNEL);
 
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_state);
+
 	test->priv = test_state;
 	test_driver = kunit_driver_create(test, "my_driver");
 
@@ -871,4 +873,5 @@ kunit_test_suites(&kunit_try_catch_test_suite, &kunit_resource_test_suite,
 		  &kunit_current_test_suite, &kunit_device_test_suite,
 		  &kunit_fault_test_suite);
 
+MODULE_DESCRIPTION("KUnit test for core test infrastructure");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/kunit/platform-test.c b/lib/kunit/platform-test.c
new file mode 100644
index 000000000000..e3debb8fbcef
--- /dev/null
+++ b/lib/kunit/platform-test.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for KUnit platform driver infrastructure.
+ */
+
+#include <linux/platform_device.h>
+
+#include <kunit/platform_device.h>
+#include <kunit/test.h>
+
+/*
+ * Test that kunit_platform_device_alloc() creates a platform device.
+ */
+static void kunit_platform_device_alloc_test(struct kunit *test)
+{
+	KUNIT_EXPECT_NOT_ERR_OR_NULL(test,
+			kunit_platform_device_alloc(test, "kunit-platform", 1));
+}
+
+/*
+ * Test that kunit_platform_device_add() registers a platform device on the
+ * platform bus with the proper name and id.
+ */
+static void kunit_platform_device_add_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	const char *name = "kunit-platform-add";
+	const int id = -1;
+
+	pdev = kunit_platform_device_alloc(test, name, id);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	KUNIT_EXPECT_EQ(test, 0, kunit_platform_device_add(test, pdev));
+	KUNIT_EXPECT_TRUE(test, dev_is_platform(&pdev->dev));
+	KUNIT_EXPECT_STREQ(test, pdev->name, name);
+	KUNIT_EXPECT_EQ(test, pdev->id, id);
+}
+
+/*
+ * Test that kunit_platform_device_add() called twice with the same device name
+ * and id fails the second time and properly cleans up.
+ */
+static void kunit_platform_device_add_twice_fails_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	const char *name = "kunit-platform-add-2";
+	const int id = -1;
+
+	pdev = kunit_platform_device_alloc(test, name, id);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_add(test, pdev));
+
+	pdev = kunit_platform_device_alloc(test, name, id);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	KUNIT_EXPECT_NE(test, 0, kunit_platform_device_add(test, pdev));
+}
+
+static int kunit_platform_device_find_by_name(struct device *dev, const void *data)
+{
+	return strcmp(dev_name(dev), data) == 0;
+}
+
+/*
+ * Test that kunit_platform_device_add() cleans up by removing the platform
+ * device when the test finishes. */
+static void kunit_platform_device_add_cleans_up(struct kunit *test)
+{
+	struct platform_device *pdev;
+	const char *name = "kunit-platform-clean";
+	const int id = -1;
+	struct kunit fake;
+	struct device *dev;
+
+	kunit_init_test(&fake, "kunit_platform_device_add_fake_test", NULL);
+	KUNIT_ASSERT_EQ(test, fake.status, KUNIT_SUCCESS);
+
+	pdev = kunit_platform_device_alloc(&fake, name, id);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_add(&fake, pdev));
+	dev = bus_find_device(&platform_bus_type, NULL, name,
+			      kunit_platform_device_find_by_name);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+	put_device(dev);
+
+	/* Remove pdev */
+	kunit_cleanup(&fake);
+
+	/*
+	 * Failing to migrate the kunit_resource would lead to an extra
+	 * put_device() call on the platform device. The best we can do here is
+	 * make sure the device no longer exists on the bus, but if something
+	 * is wrong we'll see a refcount underflow here. We can't test for a
+	 * refcount underflow because the kref matches the lifetime of the
+	 * device which should already be freed and could be used by something
+	 * else.
+	 */
+	dev = bus_find_device(&platform_bus_type, NULL, name,
+			      kunit_platform_device_find_by_name);
+	KUNIT_EXPECT_PTR_EQ(test, NULL, dev);
+	put_device(dev);
+}
+
+/*
+ * Test suite for struct platform_device kunit APIs
+ */
+static struct kunit_case kunit_platform_device_test_cases[] = {
+	KUNIT_CASE(kunit_platform_device_alloc_test),
+	KUNIT_CASE(kunit_platform_device_add_test),
+	KUNIT_CASE(kunit_platform_device_add_twice_fails_test),
+	KUNIT_CASE(kunit_platform_device_add_cleans_up),
+	{}
+};
+
+static struct kunit_suite kunit_platform_device_suite = {
+	.name = "kunit_platform_device",
+	.test_cases = kunit_platform_device_test_cases,
+};
+
+struct kunit_platform_driver_test_context {
+	struct platform_driver pdrv;
+	const char *data;
+};
+
+static const char * const test_data = "test data";
+
+static inline struct kunit_platform_driver_test_context *
+to_test_context(struct platform_device *pdev)
+{
+	return container_of(to_platform_driver(pdev->dev.driver),
+			    struct kunit_platform_driver_test_context,
+			    pdrv);
+}
+
+static int kunit_platform_driver_probe(struct platform_device *pdev)
+{
+	struct kunit_platform_driver_test_context *ctx;
+
+	ctx = to_test_context(pdev);
+	ctx->data = test_data;
+
+	return 0;
+}
+
+/* Test that kunit_platform_driver_register() registers a driver that probes. */
+static void kunit_platform_driver_register_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	struct kunit_platform_driver_test_context *ctx;
+	DECLARE_COMPLETION_ONSTACK(comp);
+	const char *name = "kunit-platform-register";
+
+	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+
+	pdev = kunit_platform_device_alloc(test, name, -1);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_add(test, pdev));
+
+	ctx->pdrv.probe = kunit_platform_driver_probe;
+	ctx->pdrv.driver.name = name;
+	ctx->pdrv.driver.owner = THIS_MODULE;
+
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_prepare_wait_for_probe(test, pdev, &comp));
+
+	KUNIT_EXPECT_EQ(test, 0, kunit_platform_driver_register(test, &ctx->pdrv));
+	KUNIT_EXPECT_NE(test, 0, wait_for_completion_timeout(&comp, 3 * HZ));
+	KUNIT_EXPECT_STREQ(test, ctx->data, test_data);
+}
+
+/*
+ * Test that kunit_platform_device_prepare_wait_for_probe() completes the completion
+ * when the device is already probed.
+ */
+static void kunit_platform_device_prepare_wait_for_probe_completes_when_already_probed(struct kunit *test)
+{
+	struct platform_device *pdev;
+	struct kunit_platform_driver_test_context *ctx;
+	DECLARE_COMPLETION_ONSTACK(comp);
+	const char *name = "kunit-platform-wait";
+
+	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+
+	pdev = kunit_platform_device_alloc(test, name, -1);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_add(test, pdev));
+
+	ctx->pdrv.probe = kunit_platform_driver_probe;
+	ctx->pdrv.driver.name = name;
+	ctx->pdrv.driver.owner = THIS_MODULE;
+
+	/* Make sure driver has actually probed */
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_prepare_wait_for_probe(test, pdev, &comp));
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_driver_register(test, &ctx->pdrv));
+	KUNIT_ASSERT_NE(test, 0, wait_for_completion_timeout(&comp, 3 * HZ));
+
+	reinit_completion(&comp);
+	KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_prepare_wait_for_probe(test, pdev, &comp));
+
+	KUNIT_EXPECT_NE(test, 0, wait_for_completion_timeout(&comp, HZ));
+}
+
+static struct kunit_case kunit_platform_driver_test_cases[] = {
+	KUNIT_CASE(kunit_platform_driver_register_test),
+	KUNIT_CASE(kunit_platform_device_prepare_wait_for_probe_completes_when_already_probed),
+	{}
+};
+
+/*
+ * Test suite for struct platform_driver kunit APIs
+ */
+static struct kunit_suite kunit_platform_driver_suite = {
+	.name = "kunit_platform_driver",
+	.test_cases = kunit_platform_driver_test_cases,
+};
+
+kunit_test_suites(
+	&kunit_platform_device_suite,
+	&kunit_platform_driver_suite,
+);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit test for KUnit platform driver infrastructure");
diff --git a/lib/kunit/platform.c b/lib/kunit/platform.c
new file mode 100644
index 000000000000..0b518de26065
--- /dev/null
+++ b/lib/kunit/platform.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test managed platform driver
+ */
+
+#include <linux/completion.h>
+#include <linux/device/bus.h>
+#include <linux/device/driver.h>
+#include <linux/platform_device.h>
+
+#include <kunit/platform_device.h>
+#include <kunit/resource.h>
+
+struct kunit_platform_device_alloc_params {
+	const char *name;
+	int id;
+};
+
+static int kunit_platform_device_alloc_init(struct kunit_resource *res, void *context)
+{
+	struct kunit_platform_device_alloc_params *params = context;
+	struct platform_device *pdev;
+
+	pdev = platform_device_alloc(params->name, params->id);
+	if (!pdev)
+		return -ENOMEM;
+
+	res->data = pdev;
+
+	return 0;
+}
+
+static void kunit_platform_device_alloc_exit(struct kunit_resource *res)
+{
+	struct platform_device *pdev = res->data;
+
+	platform_device_put(pdev);
+}
+
+/**
+ * kunit_platform_device_alloc() - Allocate a KUnit test managed platform device
+ * @test: test context
+ * @name: device name of platform device to alloc
+ * @id: identifier of platform device to alloc.
+ *
+ * Allocate a test managed platform device. The device is put when the test completes.
+ *
+ * Return: Allocated platform device on success, NULL on failure.
+ */
+struct platform_device *
+kunit_platform_device_alloc(struct kunit *test, const char *name, int id)
+{
+	struct kunit_platform_device_alloc_params params = {
+		.name = name,
+		.id = id,
+	};
+
+	return kunit_alloc_resource(test,
+				    kunit_platform_device_alloc_init,
+				    kunit_platform_device_alloc_exit,
+				    GFP_KERNEL, &params);
+}
+EXPORT_SYMBOL_GPL(kunit_platform_device_alloc);
+
+static void kunit_platform_device_add_exit(struct kunit_resource *res)
+{
+	struct platform_device *pdev = res->data;
+
+	platform_device_unregister(pdev);
+}
+
+static bool
+kunit_platform_device_alloc_match(struct kunit *test,
+				  struct kunit_resource *res, void *match_data)
+{
+	struct platform_device *pdev = match_data;
+
+	return res->data == pdev && res->free == kunit_platform_device_alloc_exit;
+}
+
+KUNIT_DEFINE_ACTION_WRAPPER(platform_device_unregister_wrapper,
+			    platform_device_unregister, struct platform_device *);
+/**
+ * kunit_platform_device_add() - Register a KUnit test managed platform device
+ * @test: test context
+ * @pdev: platform device to add
+ *
+ * Register a test managed platform device. The device is unregistered when the
+ * test completes.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kunit_platform_device_add(struct kunit *test, struct platform_device *pdev)
+{
+	struct kunit_resource *res;
+	int ret;
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		return ret;
+
+	res = kunit_find_resource(test, kunit_platform_device_alloc_match, pdev);
+	if (res) {
+		/*
+		 * Transfer the reference count of the platform device if it
+		 * was allocated with kunit_platform_device_alloc(). In this
+		 * case, calling platform_device_put() when the test exits from
+		 * kunit_platform_device_alloc_exit() would lead to reference
+		 * count underflow because platform_device_unregister_wrapper()
+		 * calls platform_device_unregister() which also calls
+		 * platform_device_put().
+		 *
+		 * Usually callers transfer the refcount initialized in
+		 * platform_device_alloc() to platform_device_add() by calling
+		 * platform_device_unregister() when platform_device_add()
+		 * succeeds or platform_device_put() when it fails. KUnit has to
+		 * keep this straight by redirecting the free routine for the
+		 * resource to the right function. Luckily this only has to
+		 * account for the success scenario.
+		 */
+		res->free = kunit_platform_device_add_exit;
+		kunit_put_resource(res);
+	} else {
+		ret = kunit_add_action_or_reset(test, platform_device_unregister_wrapper, pdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kunit_platform_device_add);
+
+struct kunit_platform_device_probe_nb {
+	struct completion *x;
+	struct device *dev;
+	struct notifier_block nb;
+};
+
+static int kunit_platform_device_probe_notify(struct notifier_block *nb,
+					      unsigned long event, void *data)
+{
+	struct kunit_platform_device_probe_nb *knb;
+	struct device *dev = data;
+
+	knb = container_of(nb, struct kunit_platform_device_probe_nb, nb);
+	if (event != BUS_NOTIFY_BOUND_DRIVER || knb->dev != dev)
+		return NOTIFY_DONE;
+
+	complete(knb->x);
+
+	return NOTIFY_OK;
+}
+
+static void kunit_platform_device_probe_nb_remove(void *nb)
+{
+	bus_unregister_notifier(&platform_bus_type, nb);
+}
+
+/**
+ * kunit_platform_device_prepare_wait_for_probe() - Prepare a completion
+ * variable to wait for a platform device to probe
+ * @test: test context
+ * @pdev: platform device to prepare to wait for probe of
+ * @x: completion variable completed when @dev has probed
+ *
+ * Prepare a completion variable @x to wait for @pdev to probe. Waiting on the
+ * completion forces a preemption, allowing the platform driver to probe.
+ *
+ * Example
+ *
+ * .. code-block:: c
+ *
+ *	static int kunit_platform_driver_probe(struct platform_device *pdev)
+ *	{
+ *		return 0;
+ *	}
+ *
+ *	static void kunit_platform_driver_test(struct kunit *test)
+ *	{
+ *		struct platform_device *pdev;
+ *		struct platform_driver *pdrv;
+ *		DECLARE_COMPLETION_ONSTACK(comp);
+ *
+ *		pdev = kunit_platform_device_alloc(test, "kunit-platform", -1);
+ *		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+ *		KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_add(test, pdev));
+ *
+ *		pdrv = kunit_kzalloc(test, sizeof(*pdrv), GFP_KERNEL);
+ *		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdrv);
+ *
+ *		pdrv->probe = kunit_platform_driver_probe;
+ *		pdrv->driver.name = "kunit-platform";
+ *		pdrv->driver.owner = THIS_MODULE;
+ *
+ *		KUNIT_ASSERT_EQ(test, 0, kunit_platform_device_prepare_wait_for_probe(test, pdev, &comp));
+ *		KUNIT_ASSERT_EQ(test, 0, kunit_platform_driver_register(test, pdrv));
+ *
+ *		KUNIT_EXPECT_NE(test, 0, wait_for_completion_timeout(&comp, 3 * HZ));
+ *	}
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kunit_platform_device_prepare_wait_for_probe(struct kunit *test,
+						 struct platform_device *pdev,
+						 struct completion *x)
+{
+	struct device *dev = &pdev->dev;
+	struct kunit_platform_device_probe_nb *knb;
+	bool bound;
+
+	knb = kunit_kzalloc(test, sizeof(*knb), GFP_KERNEL);
+	if (!knb)
+		return -ENOMEM;
+
+	knb->nb.notifier_call = kunit_platform_device_probe_notify;
+	knb->dev = dev;
+	knb->x = x;
+
+	device_lock(dev);
+	bound = device_is_bound(dev);
+	if (bound) {
+		device_unlock(dev);
+		complete(x);
+		kunit_kfree(test, knb);
+		return 0;
+	}
+
+	bus_register_notifier(&platform_bus_type, &knb->nb);
+	device_unlock(&pdev->dev);
+
+	return kunit_add_action_or_reset(test, kunit_platform_device_probe_nb_remove, &knb->nb);
+}
+EXPORT_SYMBOL_GPL(kunit_platform_device_prepare_wait_for_probe);
+
+KUNIT_DEFINE_ACTION_WRAPPER(platform_driver_unregister_wrapper,
+			    platform_driver_unregister, struct platform_driver *);
+/**
+ * kunit_platform_driver_register() - Register a KUnit test managed platform driver
+ * @test: test context
+ * @drv: platform driver to register
+ *
+ * Register a test managed platform driver. This allows callers to embed the
+ * @drv in a container structure and use container_of() in the probe function
+ * to pass information to KUnit tests.
+ *
+ * Example
+ *
+ * .. code-block:: c
+ *
+ *	struct kunit_test_context {
+ *		struct platform_driver pdrv;
+ *		const char *data;
+ *	};
+ *
+ *	static inline struct kunit_test_context *
+ *	to_test_context(struct platform_device *pdev)
+ *	{
+ *		return container_of(to_platform_driver(pdev->dev.driver),
+ *				    struct kunit_test_context,
+ *				    pdrv);
+ *	}
+ *
+ *	static int kunit_platform_driver_probe(struct platform_device *pdev)
+ *	{
+ *		struct kunit_test_context *ctx;
+ *
+ *		ctx = to_test_context(pdev);
+ *		ctx->data = "test data";
+ *
+ *		return 0;
+ *	}
+ *
+ *	static void kunit_platform_driver_test(struct kunit *test)
+ *	{
+ *		struct kunit_test_context *ctx;
+ *
+ *		ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+ *		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+ *
+ *		ctx->pdrv.probe = kunit_platform_driver_probe;
+ *		ctx->pdrv.driver.name = "kunit-platform";
+ *		ctx->pdrv.driver.owner = THIS_MODULE;
+ *
+ *		KUNIT_EXPECT_EQ(test, 0, kunit_platform_driver_register(test, &ctx->pdrv));
+ *		<... wait for driver to probe ...>
+ *		KUNIT_EXPECT_STREQ(test, ctx->data, "test data");
+ *	}
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int kunit_platform_driver_register(struct kunit *test,
+				   struct platform_driver *drv)
+{
+	int ret;
+
+	ret = platform_driver_register(drv);
+	if (ret)
+		return ret;
+
+	return kunit_add_action_or_reset(test, platform_driver_unregister_wrapper, drv);
+}
+EXPORT_SYMBOL_GPL(kunit_platform_driver_register);
diff --git a/lib/kunit/string-stream-test.c b/lib/kunit/string-stream-test.c
index 7511442ea98f..7734e33156f9 100644
--- a/lib/kunit/string-stream-test.c
+++ b/lib/kunit/string-stream-test.c
@@ -9,6 +9,7 @@
 #include <kunit/static_stub.h>
 #include <kunit/test.h>
 #include <linux/ktime.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
 
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index b8514dbb337c..146d1b48a096 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -708,7 +708,8 @@ bool kunit_enabled(void)
 	return enable_param;
 }
 
-int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_suites)
+int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_suites,
+			     bool run_tests)
 {
 	unsigned int i;
 
@@ -731,7 +732,8 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_
 
 	for (i = 0; i < num_suites; i++) {
 		kunit_init_suite(suites[i]);
-		kunit_run_tests(suites[i]);
+		if (run_tests)
+			kunit_run_tests(suites[i]);
 	}
 
 	static_branch_dec(&kunit_running);
@@ -874,6 +876,25 @@ void kunit_kfree(struct kunit *test, const void *ptr)
 }
 EXPORT_SYMBOL_GPL(kunit_kfree);
 
+void kunit_kfree_const(struct kunit *test, const void *x)
+{
+#if !IS_MODULE(CONFIG_KUNIT)
+	if (!is_kernel_rodata((unsigned long)x))
+#endif
+		kunit_kfree(test, x);
+}
+EXPORT_SYMBOL_GPL(kunit_kfree_const);
+
+const char *kunit_kstrdup_const(struct kunit *test, const char *str, gfp_t gfp)
+{
+#if !IS_MODULE(CONFIG_KUNIT)
+	if (is_kernel_rodata((unsigned long)str))
+		return str;
+#endif
+	return kunit_kstrdup(test, str, gfp);
+}
+EXPORT_SYMBOL_GPL(kunit_kstrdup_const);
+
 void kunit_cleanup(struct kunit *test)
 {
 	struct kunit_resource *res;
@@ -938,4 +959,5 @@ static void __exit kunit_exit(void)
 }
 module_exit(kunit_exit);
 
+MODULE_DESCRIPTION("Base unit test (KUnit) API");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/kunit/user_alloc.c b/lib/kunit/user_alloc.c
new file mode 100644
index 000000000000..46951be018be
--- /dev/null
+++ b/lib/kunit/user_alloc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit userspace memory allocation resource management.
+ */
+#include <kunit/resource.h>
+#include <kunit/test.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+
+struct kunit_vm_mmap_resource {
+	unsigned long addr;
+	size_t size;
+};
+
+/* vm_mmap() arguments */
+struct kunit_vm_mmap_params {
+	struct file *file;
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flag;
+	unsigned long offset;
+};
+
+/* Create and attach a new mm if it doesn't already exist. */
+static int kunit_attach_mm(void)
+{
+	struct mm_struct *mm;
+
+	if (current->mm)
+		return 0;
+
+	/* arch_pick_mmap_layout() is only sane with MMU systems. */
+	if (!IS_ENABLED(CONFIG_MMU))
+		return -EINVAL;
+
+	mm = mm_alloc();
+	if (!mm)
+		return -ENOMEM;
+
+	/* Define the task size. */
+	mm->task_size = TASK_SIZE;
+
+	/* Make sure we can allocate new VMAs. */
+	arch_pick_mmap_layout(mm, &current->signal->rlim[RLIMIT_STACK]);
+
+	/* Attach the mm. It will be cleaned up when the process dies. */
+	kthread_use_mm(mm);
+
+	return 0;
+}
+
+static int kunit_vm_mmap_init(struct kunit_resource *res, void *context)
+{
+	struct kunit_vm_mmap_params *p = context;
+	struct kunit_vm_mmap_resource vres;
+	int ret;
+
+	ret = kunit_attach_mm();
+	if (ret)
+		return ret;
+
+	vres.size = p->len;
+	vres.addr = vm_mmap(p->file, p->addr, p->len, p->prot, p->flag, p->offset);
+	if (!vres.addr)
+		return -ENOMEM;
+	res->data = kmemdup(&vres, sizeof(vres), GFP_KERNEL);
+	if (!res->data) {
+		vm_munmap(vres.addr, vres.size);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kunit_vm_mmap_free(struct kunit_resource *res)
+{
+	struct kunit_vm_mmap_resource *vres = res->data;
+
+	/*
+	 * Since this is executed from the test monitoring process,
+	 * the test's mm has already been torn down. We don't need
+	 * to run vm_munmap(vres->addr, vres->size), only clean up
+	 * the vres.
+	 */
+
+	kfree(vres);
+	res->data = NULL;
+}
+
+unsigned long kunit_vm_mmap(struct kunit *test, struct file *file,
+			    unsigned long addr, unsigned long len,
+			    unsigned long prot, unsigned long flag,
+			    unsigned long offset)
+{
+	struct kunit_vm_mmap_params params = {
+		.file = file,
+		.addr = addr,
+		.len = len,
+		.prot = prot,
+		.flag = flag,
+		.offset = offset,
+	};
+	struct kunit_vm_mmap_resource *vres;
+
+	vres = kunit_alloc_resource(test,
+				    kunit_vm_mmap_init,
+				    kunit_vm_mmap_free,
+				    GFP_KERNEL,
+				    &params);
+	if (vres)
+		return vres->addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kunit_vm_mmap);
+
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c
index 27e0c8ee71d8..48342736d016 100644
--- a/lib/kunit_iov_iter.c
+++ b/lib/kunit_iov_iter.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/uio.h>
 #include <linux/bvec.h>
+#include <linux/folio_queue.h>
 #include <kunit/test.h>
 
 MODULE_DESCRIPTION("iov_iter testing");
@@ -56,7 +57,7 @@ static void *__init iov_kunit_create_buffer(struct kunit *test,
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages);
 	*ppages = pages;
 
-	got = alloc_pages_bulk_array(GFP_KERNEL, npages, pages);
+	got = alloc_pages_bulk(GFP_KERNEL, npages, pages);
 	if (got != npages) {
 		release_pages(pages, got);
 		KUNIT_ASSERT_EQ(test, got, npages);
@@ -362,6 +363,179 @@ stop:
 	KUNIT_SUCCEED(test);
 }
 
+static void iov_kunit_destroy_folioq(void *data)
+{
+	struct folio_queue *folioq, *next;
+
+	for (folioq = data; folioq; folioq = next) {
+		next = folioq->next;
+		for (int i = 0; i < folioq_nr_slots(folioq); i++)
+			if (folioq_folio(folioq, i))
+				folio_put(folioq_folio(folioq, i));
+		kfree(folioq);
+	}
+}
+
+static void __init iov_kunit_load_folioq(struct kunit *test,
+					struct iov_iter *iter, int dir,
+					struct folio_queue *folioq,
+					struct page **pages, size_t npages)
+{
+	struct folio_queue *p = folioq;
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		if (folioq_full(p)) {
+			p->next = kzalloc(sizeof(struct folio_queue), GFP_KERNEL);
+			KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p->next);
+			folioq_init(p->next, 0);
+			p->next->prev = p;
+			p = p->next;
+		}
+		folioq_append(p, page_folio(pages[i]));
+		size += PAGE_SIZE;
+	}
+	iov_iter_folio_queue(iter, dir, folioq, 0, 0, size);
+}
+
+static struct folio_queue *iov_kunit_create_folioq(struct kunit *test)
+{
+	struct folio_queue *folioq;
+
+	folioq = kzalloc(sizeof(struct folio_queue), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, folioq);
+	kunit_add_action_or_reset(test, iov_kunit_destroy_folioq, folioq);
+	folioq_init(folioq, 0);
+	return folioq;
+}
+
+/*
+ * Test copying to a ITER_FOLIOQ-type iterator.
+ */
+static void __init iov_kunit_copy_to_folioq(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct folio_queue *folioq;
+	struct page **spages, **bpages;
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, patt;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	folioq = iov_kunit_create_folioq(test);
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	for (i = 0; i < bufsize; i++)
+		scratch[i] = pattern(i);
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	memset(buffer, 0, bufsize);
+
+	iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages);
+
+	i = 0;
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		size = pr->to - pr->from;
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+		iov_iter_folio_queue(&iter, READ, folioq, 0, 0, pr->to);
+		iov_iter_advance(&iter, pr->from);
+		copied = copy_to_iter(scratch + i, size, &iter);
+
+		KUNIT_EXPECT_EQ(test, copied, size);
+		KUNIT_EXPECT_EQ(test, iter.count, 0);
+		KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE);
+		i += size;
+		if (test->status == KUNIT_FAILURE)
+			goto stop;
+	}
+
+	/* Build the expected image in the scratch buffer. */
+	patt = 0;
+	memset(scratch, 0, bufsize);
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++)
+		for (i = pr->from; i < pr->to; i++)
+			scratch[i] = pattern(patt++);
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+		if (buffer[i] != scratch[i])
+			return;
+	}
+
+stop:
+	KUNIT_SUCCEED(test);
+}
+
+/*
+ * Test copying from a ITER_FOLIOQ-type iterator.
+ */
+static void __init iov_kunit_copy_from_folioq(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct folio_queue *folioq;
+	struct page **spages, **bpages;
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, j;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	folioq = iov_kunit_create_folioq(test);
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	for (i = 0; i < bufsize; i++)
+		buffer[i] = pattern(i);
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	memset(scratch, 0, bufsize);
+
+	iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages);
+
+	i = 0;
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		size = pr->to - pr->from;
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+		iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to);
+		iov_iter_advance(&iter, pr->from);
+		copied = copy_from_iter(scratch + i, size, &iter);
+
+		KUNIT_EXPECT_EQ(test, copied, size);
+		KUNIT_EXPECT_EQ(test, iter.count, 0);
+		KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE);
+		i += size;
+	}
+
+	/* Build the expected image in the main buffer. */
+	i = 0;
+	memset(buffer, 0, bufsize);
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		for (j = pr->from; j < pr->to; j++) {
+			buffer[i++] = pattern(j);
+			if (i >= bufsize)
+				goto stop;
+		}
+	}
+stop:
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+		if (scratch[i] != buffer[i])
+			return;
+	}
+
+	KUNIT_SUCCEED(test);
+}
+
 static void iov_kunit_destroy_xarray(void *data)
 {
 	struct xarray *xarray = data;
@@ -678,6 +852,85 @@ stop:
 }
 
 /*
+ * Test the extraction of ITER_FOLIOQ-type iterators.
+ */
+static void __init iov_kunit_extract_pages_folioq(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct folio_queue *folioq;
+	struct iov_iter iter;
+	struct page **bpages, *pagelist[8], **pages = pagelist;
+	ssize_t len;
+	size_t bufsize, size = 0, npages;
+	int i, from;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	folioq = iov_kunit_create_folioq(test);
+
+	iov_kunit_create_buffer(test, &bpages, npages);
+	iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages);
+
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		from = pr->from;
+		size = pr->to - from;
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+		iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to);
+		iov_iter_advance(&iter, from);
+
+		do {
+			size_t offset0 = LONG_MAX;
+
+			for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+				pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+			len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+						     ARRAY_SIZE(pagelist), 0, &offset0);
+			KUNIT_EXPECT_GE(test, len, 0);
+			if (len < 0)
+				break;
+			KUNIT_EXPECT_LE(test, len, size);
+			KUNIT_EXPECT_EQ(test, iter.count, size - len);
+			if (len == 0)
+				break;
+			size -= len;
+			KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+			KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+
+			for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+				struct page *p;
+				ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+				int ix;
+
+				KUNIT_ASSERT_GE(test, part, 0);
+				ix = from / PAGE_SIZE;
+				KUNIT_ASSERT_LT(test, ix, npages);
+				p = bpages[ix];
+				KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+				KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+				from += part;
+				len -= part;
+				KUNIT_ASSERT_GE(test, len, 0);
+				if (len == 0)
+					break;
+				offset0 = 0;
+			}
+
+			if (test->status == KUNIT_FAILURE)
+				goto stop;
+		} while (iov_iter_count(&iter) > 0);
+
+		KUNIT_EXPECT_EQ(test, size, 0);
+		KUNIT_EXPECT_EQ(test, iter.count, 0);
+	}
+
+stop:
+	KUNIT_SUCCEED(test);
+}
+
+/*
  * Test the extraction of ITER_XARRAY-type iterators.
  */
 static void __init iov_kunit_extract_pages_xarray(struct kunit *test)
@@ -761,10 +1014,13 @@ static struct kunit_case __refdata iov_kunit_cases[] = {
 	KUNIT_CASE(iov_kunit_copy_from_kvec),
 	KUNIT_CASE(iov_kunit_copy_to_bvec),
 	KUNIT_CASE(iov_kunit_copy_from_bvec),
+	KUNIT_CASE(iov_kunit_copy_to_folioq),
+	KUNIT_CASE(iov_kunit_copy_from_folioq),
 	KUNIT_CASE(iov_kunit_copy_to_xarray),
 	KUNIT_CASE(iov_kunit_copy_from_xarray),
 	KUNIT_CASE(iov_kunit_extract_pages_kvec),
 	KUNIT_CASE(iov_kunit_extract_pages_bvec),
+	KUNIT_CASE(iov_kunit_extract_pages_folioq),
 	KUNIT_CASE(iov_kunit_extract_pages_xarray),
 	{}
 };
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
deleted file mode 100644
index 649e687413a0..000000000000
--- a/lib/libcrc32c.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* 
- * CRC32C
- *@Article{castagnoli-crc,
- * author =       { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
- * title =        {{Optimization of Cyclic Redundancy-Check Codes with 24
- *                 and 32 Parity Bits}},
- * journal =      IEEE Transactions on Communication,
- * year =         {1993},
- * volume =       {41},
- * number =       {6},
- * pages =        {},
- * month =        {June},
- *}
- * Used by the iSCSI driver, possibly others, and derived from
- * the iscsi-crc.c module of the linux-iscsi driver at
- * http://linux-iscsi.sourceforge.net.
- *
- * Following the example of lib/crc32, this function is intended to be
- * flexible and useful for all users.  Modules that currently have their
- * own crc32c, but hopefully may be able to use this one are:
- *  net/sctp (please add all your doco to here if you change to
- *            use this one!)
- *  <endoflist>
- *
- * Copyright (c) 2004 Cisco Systems, Inc.
- */
-
-#include <crypto/hash.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/crc32c.h>
-
-static struct crypto_shash *tfm;
-
-u32 crc32c(u32 crc, const void *address, unsigned int length)
-{
-	SHASH_DESC_ON_STACK(shash, tfm);
-	u32 ret, *ctx = (u32 *)shash_desc_ctx(shash);
-	int err;
-
-	shash->tfm = tfm;
-	*ctx = crc;
-
-	err = crypto_shash_update(shash, address, length);
-	BUG_ON(err);
-
-	ret = *ctx;
-	barrier_data(ctx);
-	return ret;
-}
-
-EXPORT_SYMBOL(crc32c);
-
-static int __init libcrc32c_mod_init(void)
-{
-	tfm = crypto_alloc_shash("crc32c", 0, 0);
-	return PTR_ERR_OR_ZERO(tfm);
-}
-
-static void __exit libcrc32c_mod_fini(void)
-{
-	crypto_free_shash(tfm);
-}
-
-module_init(libcrc32c_mod_init);
-module_exit(libcrc32c_mod_fini);
-
-MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
-MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc32c");
diff --git a/lib/list-test.c b/lib/list-test.c
index 0cc27de9cec8..9135cdc1bb39 100644
--- a/lib/list-test.c
+++ b/lib/list-test.c
@@ -102,6 +102,8 @@ static void list_test_list_replace(struct kunit *test)
 	/* now: [list] -> a_new -> b */
 	KUNIT_EXPECT_PTR_EQ(test, list.next, &a_new);
 	KUNIT_EXPECT_PTR_EQ(test, b.prev, &a_new);
+	KUNIT_EXPECT_PTR_EQ(test, a_new.next, &b);
+	KUNIT_EXPECT_PTR_EQ(test, a_new.prev, &list);
 }
 
 static void list_test_list_replace_init(struct kunit *test)
@@ -118,6 +120,8 @@ static void list_test_list_replace_init(struct kunit *test)
 	/* now: [list] -> a_new -> b */
 	KUNIT_EXPECT_PTR_EQ(test, list.next, &a_new);
 	KUNIT_EXPECT_PTR_EQ(test, b.prev, &a_new);
+	KUNIT_EXPECT_PTR_EQ(test, a_new.next, &b);
+	KUNIT_EXPECT_PTR_EQ(test, a_new.prev, &list);
 
 	/* check a_old is empty (initialized) */
 	KUNIT_EXPECT_TRUE(test, list_empty_careful(&a_old));
@@ -408,6 +412,8 @@ static void list_test_list_cut_position(struct kunit *test)
 		KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
 		i++;
 	}
+
+	KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_cut_before(struct kunit *test)
@@ -436,6 +442,8 @@ static void list_test_list_cut_before(struct kunit *test)
 		KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
 		i++;
 	}
+
+	KUNIT_EXPECT_EQ(test, i, 3);
 }
 
 static void list_test_list_splice(struct kunit *test)
@@ -1201,12 +1209,6 @@ static struct kunit_suite hlist_test_module = {
 };
 
 
-struct klist_test_struct {
-	int data;
-	struct klist klist;
-	struct klist_node klist_node;
-};
-
 static int node_count;
 static struct klist_node *last_node;
 
@@ -1499,4 +1501,5 @@ static struct kunit_suite klist_test_module = {
 
 kunit_test_suites(&list_test_module, &hlist_test_module, &klist_test_module);
 
+MODULE_DESCRIPTION("KUnit test for the Kernel Linked-list structures");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/list_debug.c b/lib/list_debug.c
index db602417febf..ee7eeeb8f92c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -22,17 +22,17 @@ __list_valid_slowpath
 bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
 				struct list_head *next)
 {
-	if (CHECK_DATA_CORRUPTION(prev == NULL,
+	if (CHECK_DATA_CORRUPTION(prev == NULL, NULL,
 			"list_add corruption. prev is NULL.\n") ||
-	    CHECK_DATA_CORRUPTION(next == NULL,
+	    CHECK_DATA_CORRUPTION(next == NULL, NULL,
 			"list_add corruption. next is NULL.\n") ||
-	    CHECK_DATA_CORRUPTION(next->prev != prev,
+	    CHECK_DATA_CORRUPTION(next->prev != prev, next,
 			"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
 			prev, next->prev, next) ||
-	    CHECK_DATA_CORRUPTION(prev->next != next,
+	    CHECK_DATA_CORRUPTION(prev->next != next, prev,
 			"list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
 			next, prev->next, prev) ||
-	    CHECK_DATA_CORRUPTION(new == prev || new == next,
+	    CHECK_DATA_CORRUPTION(new == prev || new == next, NULL,
 			"list_add double add: new=%px, prev=%px, next=%px.\n",
 			new, prev, next))
 		return false;
@@ -49,20 +49,20 @@ bool __list_del_entry_valid_or_report(struct list_head *entry)
 	prev = entry->prev;
 	next = entry->next;
 
-	if (CHECK_DATA_CORRUPTION(next == NULL,
+	if (CHECK_DATA_CORRUPTION(next == NULL, NULL,
 			"list_del corruption, %px->next is NULL\n", entry) ||
-	    CHECK_DATA_CORRUPTION(prev == NULL,
+	    CHECK_DATA_CORRUPTION(prev == NULL, NULL,
 			"list_del corruption, %px->prev is NULL\n", entry) ||
-	    CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+	    CHECK_DATA_CORRUPTION(next == LIST_POISON1, next,
 			"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
 			entry, LIST_POISON1) ||
-	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2, prev,
 			"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
 			entry, LIST_POISON2) ||
-	    CHECK_DATA_CORRUPTION(prev->next != entry,
+	    CHECK_DATA_CORRUPTION(prev->next != entry, prev,
 			"list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n",
 			entry, prev->next, prev) ||
-	    CHECK_DATA_CORRUPTION(next->prev != entry,
+	    CHECK_DATA_CORRUPTION(next->prev != entry, next,
 			"list_del corruption. next->prev should be %px, but was %px. (next=%px)\n",
 			entry, next->prev, next))
 		return false;
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 0fb59e92ca2d..a310ecb7ccc0 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,9 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/string.h>
 #include <linux/list_sort.h>
 #include <linux/list.h>
 
@@ -111,6 +108,13 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head,
  * and list_sort is a stable sort, so it is not necessary to distinguish
  * the @a < @b and @a == @b cases.
  *
+ * The comparison function must adhere to specific mathematical properties
+ * to ensure correct and stable sorting:
+ * - Antisymmetry: cmp(@a, @b) must return the opposite sign of
+ * cmp(@b, @a).
+ * - Transitivity: if cmp(@a, @b) <= 0 and cmp(@b, @c) <= 0, then
+ * cmp(@a, @c) <= 0.
+ *
  * This is compatible with two styles of @cmp function:
  * - The traditional style which returns <0 / =0 / >0, or
  * - Returning a boolean 0/1.
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6f6a5fc85b42..ed99344317f5 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1720,8 +1720,6 @@ static void ww_test_normal(void)
 {
 	int ret;
 
-	WWAI(&t);
-
 	/*
 	 * None of the ww_mutex codepaths should be taken in the 'normal'
 	 * mutex calls. The easiest way to verify this is by using the
@@ -1770,6 +1768,8 @@ static void ww_test_normal(void)
 	ww_mutex_base_unlock(&o.base);
 	WARN_ON(o.ctx != (void *)~0UL);
 
+	WWAI(&t);
+
 	/* nest_lock */
 	o.ctx = (void *)~0UL;
 	ww_mutex_base_lock_nest_lock(&o.base, &t);
@@ -2710,6 +2710,43 @@ static void local_lock_3B(void)
 
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem)
+{
+	return rwsem->dep_map.name;
+}
+#else
+static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem)
+{
+	return NULL;
+}
+#endif
+
+static void test_lockdep_set_subclass_name(void)
+{
+	const char *name_before = rw_semaphore_lockdep_name(&rwsem_X1);
+	const char *name_after;
+
+	lockdep_set_subclass(&rwsem_X1, 1);
+	name_after = rw_semaphore_lockdep_name(&rwsem_X1);
+	DEBUG_LOCKS_WARN_ON(name_before != name_after);
+}
+
+/*
+ * lockdep_set_subclass() should reuse the existing lock class name instead
+ * of creating a new one.
+ */
+static void lockdep_set_subclass_name_test(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | lockdep_set_subclass() name test|\n");
+	printk("  -----------------------------------\n");
+
+	print_testname("compare name before and after");
+	dotest(test_lockdep_set_subclass_name, SUCCESS, LOCKTYPE_RWSEM);
+	pr_cont("\n");
+}
+
 static void local_lock_tests(void)
 {
 	printk("  --------------------------------------------------------------------------\n");
@@ -2920,6 +2957,8 @@ void locking_selftest(void)
 	dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE, LOCKTYPE_SPECIAL);
 	pr_cont("\n");
 
+	lockdep_set_subclass_name_test();
+
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
 		debug_locks = 0;
diff --git a/lib/lockref.c b/lib/lockref.c
index 2afe4c5d8919..5d8e3ef3860e 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -58,23 +58,22 @@ EXPORT_SYMBOL(lockref_get);
  * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count was zero
  */
-int lockref_get_not_zero(struct lockref *lockref)
+bool lockref_get_not_zero(struct lockref *lockref)
 {
-	int retval;
+	bool retval = false;
 
 	CMPXCHG_LOOP(
 		new.count++;
 		if (old.count <= 0)
-			return 0;
+			return false;
 	,
-		return 1;
+		return true;
 	);
 
 	spin_lock(&lockref->lock);
-	retval = 0;
 	if (lockref->count > 0) {
 		lockref->count++;
-		retval = 1;
+		retval = true;
 	}
 	spin_unlock(&lockref->lock);
 	return retval;
@@ -82,39 +81,11 @@ int lockref_get_not_zero(struct lockref *lockref)
 EXPORT_SYMBOL(lockref_get_not_zero);
 
 /**
- * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
- * @lockref: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count would become zero
- */
-int lockref_put_not_zero(struct lockref *lockref)
-{
-	int retval;
-
-	CMPXCHG_LOOP(
-		new.count--;
-		if (old.count <= 1)
-			return 0;
-	,
-		return 1;
-	);
-
-	spin_lock(&lockref->lock);
-	retval = 0;
-	if (lockref->count > 1) {
-		lockref->count--;
-		retval = 1;
-	}
-	spin_unlock(&lockref->lock);
-	return retval;
-}
-EXPORT_SYMBOL(lockref_put_not_zero);
-
-/**
  * lockref_put_return - Decrement reference count if possible
  * @lockref: pointer to lockref structure
  *
  * Decrement the reference count and return the new value.
- * If the lockref was dead or locked, return an error.
+ * If the lockref was dead or locked, return -1.
  */
 int lockref_put_return(struct lockref *lockref)
 {
@@ -134,22 +105,22 @@ EXPORT_SYMBOL(lockref_put_return);
  * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
  */
-int lockref_put_or_lock(struct lockref *lockref)
+bool lockref_put_or_lock(struct lockref *lockref)
 {
 	CMPXCHG_LOOP(
 		new.count--;
 		if (old.count <= 1)
 			break;
 	,
-		return 1;
+		return true;
 	);
 
 	spin_lock(&lockref->lock);
 	if (lockref->count <= 1)
-		return 0;
+		return false;
 	lockref->count--;
 	spin_unlock(&lockref->lock);
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL(lockref_put_or_lock);
 
@@ -169,23 +140,22 @@ EXPORT_SYMBOL(lockref_mark_dead);
  * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if lockref was dead
  */
-int lockref_get_not_dead(struct lockref *lockref)
+bool lockref_get_not_dead(struct lockref *lockref)
 {
-	int retval;
+	bool retval = false;
 
 	CMPXCHG_LOOP(
 		new.count++;
 		if (old.count < 0)
-			return 0;
+			return false;
 	,
-		return 1;
+		return true;
 	);
 
 	spin_lock(&lockref->lock);
-	retval = 0;
 	if (lockref->count >= 0) {
 		lockref->count++;
-		retval = 1;
+		retval = true;
 	}
 	spin_unlock(&lockref->lock);
 	return retval;
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
index 2ea564a40064..e29496a38d06 100644
--- a/lib/logic_pio.c
+++ b/lib/logic_pio.c
@@ -122,7 +122,7 @@ void logic_pio_unregister_range(struct logic_pio_hwaddr *range)
  *
  * Traverse the io_range_list to find the registered node for @fwnode.
  */
-struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode)
+struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode)
 {
 	struct logic_pio_hwaddr *range, *found_range = NULL;
 
@@ -186,7 +186,7 @@ resource_size_t logic_pio_to_hwaddr(unsigned long pio)
  *
  * Returns Logical PIO value if successful, ~0UL otherwise
  */
-unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode,
 				     resource_size_t addr, resource_size_t size)
 {
 	struct logic_pio_hwaddr *range;
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index b3d9187611de..9e0d469c7658 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -243,7 +243,7 @@ static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
 
 	BUG_ON(!lc);
 	BUG_ON(!lc->nr_elements);
-	hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) {
+	hlist_for_each_entry(e, lc_hash_slot(lc, enr), collision) {
 		/* "about to be changed" elements, pending transaction commit,
 		 * are hashed by their "new number". "Normal" elements have
 		 * lc_number == lc_new_number. */
@@ -303,7 +303,7 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
 	BUG_ON(e->refcnt);
 
 	e->lc_number = e->lc_new_number = LC_FREE;
-	hlist_del_init(&e->colision);
+	hlist_del_init(&e->collision);
 	list_move(&e->list, &lc->free);
 	RETURN();
 }
@@ -324,9 +324,9 @@ static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned n
 	PARANOIA_LC_ELEMENT(lc, e);
 
 	e->lc_new_number = new_number;
-	if (!hlist_unhashed(&e->colision))
-		__hlist_del(&e->colision);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
+	if (!hlist_unhashed(&e->collision))
+		__hlist_del(&e->collision);
+	hlist_add_head(&e->collision, lc_hash_slot(lc, new_number));
 	list_move(&e->list, &lc->to_be_changed);
 
 	return e;
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 90bb67994688..2a397bb2c661 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -33,11 +33,10 @@
 /*-************************************
  *	Dependencies
  **************************************/
-#include <linux/lz4.h>
 #include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 static const int LZ4_minLength = (MFLIMIT + 1);
 static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 59fe69a63800..3a2cd9acada4 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -33,12 +33,11 @@
 /*-************************************
  *	Dependencies
  **************************************/
-#include <linux/lz4.h>
 #include "lz4defs.h"
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 /*-*****************************
  *	Decompression functions
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 330aa539b46e..17277ec16919 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -35,10 +35,11 @@
  *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #include <linux/bitops.h>
 #include <linux/string.h>	 /* memset, memcpy */
+#include <linux/lz4.h>
 
 #define FORCE_INLINE __always_inline
 
@@ -92,8 +93,7 @@ typedef uintptr_t uptrval;
 #define MB (1 << 20)
 #define GB (1U << 30)
 
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define MAX_DISTANCE LZ4_DISTANCE_MAX
 #define STEPSIZE sizeof(size_t)
 
 #define ML_BITS	4
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index e7ac8694b797..91936dc3d14b 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -34,7 +34,6 @@
 /*-************************************
  *	Dependencies
  **************************************/
-#include <linux/lz4.h>
 #include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -621,6 +620,7 @@ void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
 	LZ4_streamHCPtr->internal_donotuse.base = NULL;
 	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
 }
+EXPORT_SYMBOL(LZ4_resetStreamHC);
 
 int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
 	const char *dictionary,
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 9d31e7126606..47d6d43ea957 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -14,7 +14,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <linux/lzo.h>
 #include "lzodefs.h"
 
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 7892a40cf765..c94f4928e188 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #endif
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <linux/lzo.h>
 #include "lzodefs.h"
 
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 2d7d27e6ae3c..f7153ade1be5 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -64,6 +64,21 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/maple_tree.h>
 
+/*
+ * Kernel pointer hashing renders much of the maple tree dump useless as tagged
+ * pointers get hashed to arbitrary values.
+ *
+ * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is
+ * permissible to bypass this. Otherwise remain cautious and retain the hashing.
+ *
+ * Userland doesn't know about %px so also use %p there.
+ */
+#if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+#define PTR_FMT "%px"
+#else
+#define PTR_FMT "%p"
+#endif
+
 #define MA_ROOT_PARENT 1
 
 /*
@@ -120,7 +135,6 @@ static const unsigned char mt_min_slots[] = {
 #define MAPLE_BIG_NODE_GAPS	(MAPLE_ARANGE64_SLOTS * 2 + 1)
 
 struct maple_big_node {
-	struct maple_pnode *parent;
 	unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
 	union {
 		struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
@@ -348,17 +362,17 @@ static inline void *mte_safe_root(const struct maple_enode *node)
 	return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
 }
 
-static inline void *mte_set_full(const struct maple_enode *node)
+static inline void __maybe_unused *mte_set_full(const struct maple_enode *node)
 {
 	return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
 }
 
-static inline void *mte_clear_full(const struct maple_enode *node)
+static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node)
 {
 	return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
 }
 
-static inline bool mte_has_null(const struct maple_enode *node)
+static inline bool __maybe_unused mte_has_null(const struct maple_enode *node)
 {
 	return (unsigned long)node & MAPLE_ENODE_NULL;
 }
@@ -474,6 +488,7 @@ enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
 
 /*
  * mas_set_parent() - Set the parent node and encode the slot
+ * @mas: The maple state
  * @enode: The encoded maple node.
  * @parent: The encoded maple node that is the parent of @enode.
  * @slot: The slot that @enode resides in @parent.
@@ -534,7 +549,7 @@ unsigned int mte_parent_slot(const struct maple_enode *enode)
 
 /*
  * mte_parent() - Get the parent of @node.
- * @node: The encoded maple node.
+ * @enode: The encoded maple node.
  *
  * Return: The parent maple node.
  */
@@ -641,8 +656,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas)
 
 /*
  * ma_pivots() - Get a pointer to the maple node pivots.
- * @node - the maple node
- * @type - the node type
+ * @node: the maple node
+ * @type: the node type
  *
  * In the event of a dead node, this array may be %NULL
  *
@@ -665,8 +680,8 @@ static inline unsigned long *ma_pivots(struct maple_node *node,
 
 /*
  * ma_gaps() - Get a pointer to the maple node gaps.
- * @node - the maple node
- * @type - the node type
+ * @node: the maple node
+ * @type: the node type
  *
  * Return: A pointer to the maple node gaps
  */
@@ -880,8 +895,6 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
  * @mt: The maple tree
  * @mn: The maple node
  * @type: The maple node type
- * @offset: The offset of the highest sub-gap in this node.
- * @end: The end of the data in this node.
  */
 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
 				  enum maple_type type)
@@ -939,7 +952,7 @@ static inline unsigned char ma_meta_gap(struct maple_node *mn)
 /*
  * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
  * @mn: The maple node
- * @mn: The maple node type
+ * @mt: The maple node type
  * @offset: The location of the largest gap.
  */
 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
@@ -953,8 +966,8 @@ static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
 
 /*
  * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
- * @mat - the ma_topiary, a linked list of dead nodes.
- * @dead_enode - the node to be marked as dead and added to the tail of the list
+ * @mat: the ma_topiary, a linked list of dead nodes.
+ * @dead_enode: the node to be marked as dead and added to the tail of the list
  *
  * Add the @dead_enode to the linked list in @mat.
  */
@@ -977,8 +990,8 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
 			    bool free);
 /*
  * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
- * @mas - the maple state
- * @mat - the ma_topiary linked list of dead nodes to free.
+ * @mas: the maple state
+ * @mat: the ma_topiary linked list of dead nodes to free.
  *
  * Destroy walk a dead list.
  */
@@ -999,7 +1012,7 @@ static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
 }
 /*
  * mas_descend() - Descend into the slot stored in the ma_state.
- * @mas - the maple state.
+ * @mas: the maple state.
  *
  * Note: Not RCU safe, only use in write side or debug code.
  */
@@ -1194,19 +1207,17 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
 
 	reuse->request_count = 0;
 	reuse->node_count = 0;
-	if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
-		head->slot[head->node_count++] = reuse;
-		head->total++;
-		goto done;
-	}
-
-	reuse->total = 1;
-	if ((head) && !((unsigned long)head & 0x1)) {
+	if (count) {
+		if (head->node_count < MAPLE_ALLOC_SLOTS) {
+			head->slot[head->node_count++] = reuse;
+			head->total++;
+			goto done;
+		}
 		reuse->slot[0] = head;
 		reuse->node_count = 1;
-		reuse->total += head->total;
 	}
 
+	reuse->total = count + 1;
 	mas->alloc = reuse;
 done:
 	if (requested > 1)
@@ -1252,11 +1263,11 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 
 		mas->alloc = node;
 		node->total = ++allocated;
+		node->request_count = 0;
 		requested--;
 	}
 
 	node = mas->alloc;
-	node->request_count = 0;
 	while (requested) {
 		max_req = MAPLE_ALLOC_SLOTS - node->node_count;
 		slots = (void **)&node->slot[node->node_count];
@@ -1272,7 +1283,10 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 
 		node->node_count += count;
 		allocated += count;
-		node = node->slot[0];
+		/* find a non-full node*/
+		do {
+			node = node->slot[0];
+		} while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS));
 		requested -= count;
 	}
 	mas->alloc->total = allocated;
@@ -1281,10 +1295,9 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
 nomem_bulk:
 	/* Clean up potential freed allocations on bulk failure */
 	memset(slots, 0, max_req * sizeof(unsigned long));
+	mas->alloc->total = allocated;
 nomem_one:
 	mas_set_alloc_req(mas, requested);
-	if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
-		mas->alloc->total = allocated;
 	mas_set_err(mas, -ENOMEM);
 }
 
@@ -1346,8 +1359,8 @@ static void mas_node_count(struct ma_state *mas, int count)
  * Return:
  * - If mas->node is an error or not mas_start, return NULL.
  * - If it's an empty tree:     NULL & mas->status == ma_none
- * - If it's a single entry:    The entry & mas->status == mas_root
- * - If it's a tree:            NULL & mas->status == safe root node.
+ * - If it's a single entry:    The entry & mas->status == ma_root
+ * - If it's a tree:            NULL & mas->status == ma_active
  */
 static inline struct maple_enode *mas_start(struct ma_state *mas)
 {
@@ -1372,9 +1385,9 @@ retry:
 			return NULL;
 		}
 
+		mas->node = NULL;
 		/* empty tree */
 		if (unlikely(!root)) {
-			mas->node = NULL;
 			mas->status = ma_none;
 			mas->offset = MAPLE_NODE_SLOTS;
 			return NULL;
@@ -1462,7 +1475,7 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
 
 /*
  * mas_leaf_max_gap() - Returns the largest gap in a leaf node
- * @mas - the maple state
+ * @mas: the maple state
  *
  * Return: The maximum gap in the leaf.
  */
@@ -1544,7 +1557,7 @@ static unsigned long mas_leaf_max_gap(struct ma_state *mas)
  * @node: The maple node
  * @gaps: The pointer to the gaps
  * @mt: The maple node type
- * @*off: Pointer to store the offset location of the gap.
+ * @off: Pointer to store the offset location of the gap.
  *
  * Uses the metadata data end to scan backwards across set gaps.
  *
@@ -1651,7 +1664,7 @@ ascend:
 
 /*
  * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
- * @mas - the maple state.
+ * @mas: the maple state.
  */
 static inline void mas_update_gap(struct ma_state *mas)
 {
@@ -1678,8 +1691,8 @@ static inline void mas_update_gap(struct ma_state *mas)
 /*
  * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
  * @parent with the slot encoded.
- * @mas - the maple state (for the tree)
- * @parent - the maple encoded node containing the children.
+ * @mas: the maple state (for the tree)
+ * @parent: the maple encoded node containing the children.
  */
 static inline void mas_adopt_children(struct ma_state *mas,
 		struct maple_enode *parent)
@@ -1701,8 +1714,8 @@ static inline void mas_adopt_children(struct ma_state *mas,
 /*
  * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
  * node as dead.
- * @mas - the maple state with the new node
- * @old_enode - The old maple encoded node to replace.
+ * @mas: the maple state with the new node
+ * @old_enode: The old maple encoded node to replace.
  */
 static inline void mas_put_in_tree(struct ma_state *mas,
 		struct maple_enode *old_enode)
@@ -1730,8 +1743,8 @@ static inline void mas_put_in_tree(struct ma_state *mas,
  * mas_replace_node() - Replace a node by putting it in the tree, marking it
  * dead, and freeing it.
  * the parent encoding to locate the maple node in the tree.
- * @mas - the ma_state with @mas->node pointing to the new node.
- * @old_enode - The old maple encoded node.
+ * @mas: the ma_state with @mas->node pointing to the new node.
+ * @old_enode: The old maple encoded node.
  */
 static inline void mas_replace_node(struct ma_state *mas,
 		struct maple_enode *old_enode)
@@ -1796,7 +1809,6 @@ static inline void mab_shift_right(struct maple_big_node *b_node,
 /*
  * mab_middle_node() - Check if a middle node is needed (unlikely)
  * @b_node: the maple_big_node that contains the data.
- * @size: the amount of data in the b_node
  * @split: the potential split location
  * @slot_count: the size that can be stored in a single node being considered.
  *
@@ -1844,17 +1856,18 @@ static inline int mab_no_null_split(struct maple_big_node *b_node,
 /*
  * mab_calc_split() - Calculate the split location and if there needs to be two
  * splits.
+ * @mas: The maple state
  * @bn: The maple_big_node with the data
  * @mid_split: The second split, if required.  0 otherwise.
  *
  * Return: The first split location.  The middle split is set in @mid_split.
  */
 static inline int mab_calc_split(struct ma_state *mas,
-	 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
+	 struct maple_big_node *bn, unsigned char *mid_split)
 {
 	unsigned char b_end = bn->b_end;
 	int split = b_end / 2; /* Assume equal split. */
-	unsigned char slot_min, slot_count = mt_slots[bn->type];
+	unsigned char slot_count = mt_slots[bn->type];
 
 	/*
 	 * To support gap tracking, all NULL entries are kept together and a node cannot
@@ -1887,18 +1900,7 @@ static inline int mab_calc_split(struct ma_state *mas,
 		split = b_end / 3;
 		*mid_split = split * 2;
 	} else {
-		slot_min = mt_min_slots[bn->type];
-
 		*mid_split = 0;
-		/*
-		 * Avoid having a range less than the slot count unless it
-		 * causes one node to be deficient.
-		 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
-		 */
-		while ((split < slot_count - 1) &&
-		       ((bn->pivot[split] - min) < slot_count - 1) &&
-		       (b_end - split > slot_min))
-			split++;
 	}
 
 	/* Avoid ending a node on a NULL entry */
@@ -1944,14 +1946,13 @@ static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
 	for (; i < piv_end; i++, j++) {
 		b_node->pivot[j] = pivots[i];
 		if (unlikely(!b_node->pivot[j]))
-			break;
+			goto complete;
 
 		if (unlikely(mas->max == b_node->pivot[j]))
 			goto complete;
 	}
 
-	if (likely(i <= mas_end))
-		b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
+	b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
 
 complete:
 	b_node->b_end = ++j;
@@ -2140,9 +2141,7 @@ static inline bool mas_prev_sibling(struct ma_state *mas)
 {
 	unsigned int p_slot = mte_parent_slot(mas->node);
 
-	if (mte_is_root(mas->node))
-		return false;
-
+	/* For root node, p_slot is set to 0 by mte_parent_slot(). */
 	if (!p_slot)
 		return false;
 
@@ -2177,7 +2176,8 @@ static inline bool mas_next_sibling(struct ma_state *mas)
 }
 
 /*
- * mte_node_or_none() - Set the enode and state.
+ * mas_node_or_none() - Set the enode and state.
+ * @mas: the maple state
  * @enode: The encoded maple node.
  *
  * Set the node to the enode and the status.
@@ -2196,6 +2196,8 @@ static inline void mas_node_or_none(struct ma_state *mas,
 
 /*
  * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
+ *                      If @mas->index cannot be found within the containing
+ *                      node, we traverse to the last entry in the node.
  * @wr_mas: The maple write state
  *
  * Uses mas_slot_locked() and does not need to worry about dead nodes.
@@ -2228,7 +2230,6 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
 /*
  * mast_rebalance_next() - Rebalance against the next node
  * @mast: The maple subtree state
- * @old_r: The encoded maple node to the right (next node).
  */
 static inline void mast_rebalance_next(struct maple_subtree_state *mast)
 {
@@ -2242,7 +2243,6 @@ static inline void mast_rebalance_next(struct maple_subtree_state *mast)
 /*
  * mast_rebalance_prev() - Rebalance against the previous node
  * @mast: The maple subtree state
- * @old_l: The encoded maple node to the left (previous node)
  */
 static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
 {
@@ -2366,7 +2366,7 @@ static inline struct maple_enode
 static inline unsigned char mas_mab_to_node(struct ma_state *mas,
 	struct maple_big_node *b_node, struct maple_enode **left,
 	struct maple_enode **right, struct maple_enode **middle,
-	unsigned char *mid_split, unsigned long min)
+	unsigned char *mid_split)
 {
 	unsigned char split = 0;
 	unsigned char slot_count = mt_slots[b_node->type];
@@ -2379,7 +2379,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas,
 	if (b_node->b_end < slot_count) {
 		split = b_node->b_end;
 	} else {
-		split = mab_calc_split(mas, b_node, mid_split, min);
+		split = mab_calc_split(mas, b_node, mid_split);
 		*right = mas_new_ma_node(mas, b_node);
 	}
 
@@ -2393,9 +2393,9 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas,
 /*
  * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
  * pointer.
- * @b_node - the big node to add the entry
- * @mas - the maple state to get the pivot (mas->max)
- * @entry - the entry to add, if NULL nothing happens.
+ * @b_node: the big node to add the entry
+ * @mas: the maple state to get the pivot (mas->max)
+ * @entry: the entry to add, if NULL nothing happens.
  */
 static inline void mab_set_b_end(struct maple_big_node *b_node,
 				 struct ma_state *mas,
@@ -2414,11 +2414,11 @@ static inline void mab_set_b_end(struct maple_big_node *b_node,
  * mas_set_split_parent() - combine_then_separate helper function.  Sets the parent
  * of @mas->node to either @left or @right, depending on @slot and @split
  *
- * @mas - the maple state with the node that needs a parent
- * @left - possible parent 1
- * @right - possible parent 2
- * @slot - the slot the mas->node was placed
- * @split - the split location between @left and @right
+ * @mas: the maple state with the node that needs a parent
+ * @left: possible parent 1
+ * @right: possible parent 2
+ * @slot: the slot the mas->node was placed
+ * @split: the split location between @left and @right
  */
 static inline void mas_set_split_parent(struct ma_state *mas,
 					struct maple_enode *left,
@@ -2438,11 +2438,11 @@ static inline void mas_set_split_parent(struct ma_state *mas,
 
 /*
  * mte_mid_split_check() - Check if the next node passes the mid-split
- * @**l: Pointer to left encoded maple node.
- * @**m: Pointer to middle encoded maple node.
- * @**r: Pointer to right encoded maple node.
+ * @l: Pointer to left encoded maple node.
+ * @m: Pointer to middle encoded maple node.
+ * @r: Pointer to right encoded maple node.
  * @slot: The offset
- * @*split: The split location.
+ * @split: The split location.
  * @mid_split: The middle split.
  */
 static inline void mte_mid_split_check(struct maple_enode **l,
@@ -2466,10 +2466,10 @@ static inline void mte_mid_split_check(struct maple_enode **l,
 /*
  * mast_set_split_parents() - Helper function to set three nodes parents.  Slot
  * is taken from @mast->l.
- * @mast - the maple subtree state
- * @left - the left node
- * @right - the right node
- * @split - the split location.
+ * @mast: the maple subtree state
+ * @left: the left node
+ * @right: the right node
+ * @split: the split location.
  */
 static inline void mast_set_split_parents(struct maple_subtree_state *mast,
 					  struct maple_enode *left,
@@ -2503,7 +2503,6 @@ static inline void mast_set_split_parents(struct maple_subtree_state *mast,
 /*
  * mas_topiary_node() - Dispose of a single node
  * @mas: The maple state for pushing nodes
- * @enode: The encoded maple node
  * @in_rcu: If the tree is in rcu mode
  *
  * The node will either be RCU freed or pushed back on the maple state.
@@ -2635,7 +2634,7 @@ static inline void mas_topiary_replace(struct ma_state *mas,
 /*
  * mas_wmb_replace() - Write memory barrier and replace
  * @mas: The maple state
- * @old: The old maple encoded node that is being replaced.
+ * @old_enode: The old maple encoded node that is being replaced.
  *
  * Updates gap as necessary.
  */
@@ -2823,10 +2822,8 @@ dead_node:
  * orig_l_mas->last is used in mas_consume to find the slots that will need to
  * be either freed or destroyed.  orig_l_mas->depth keeps track of the height of
  * the new sub-tree in case the sub-tree becomes the full tree.
- *
- * Return: the number of elements in b_node during the last loop.
  */
-static int mas_spanning_rebalance(struct ma_state *mas,
+static void mas_spanning_rebalance(struct ma_state *mas,
 		struct maple_subtree_state *mast, unsigned char count)
 {
 	unsigned char split, mid_split;
@@ -2869,7 +2866,7 @@ static int mas_spanning_rebalance(struct ma_state *mas,
 		mast->bn->b_end--;
 		mast->bn->type = mte_node_type(mast->orig_l->node);
 		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
-					&mid_split, mast->orig_l->min);
+					&mid_split);
 		mast_set_split_parents(mast, left, middle, right, split,
 				       mid_split);
 		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
@@ -2942,7 +2939,7 @@ new_root:
 	mas->offset = l_mas.offset;
 	mas_wmb_replace(mas, old_enode);
 	mtree_range_walk(mas);
-	return mast->bn->b_end;
+	return;
 }
 
 /*
@@ -2952,10 +2949,8 @@ new_root:
  *
  * Rebalance two nodes into a single node or two new nodes that are sufficient.
  * Continue upwards until tree is sufficient.
- *
- * Return: the number of elements in b_node during the last loop.
  */
-static inline int mas_rebalance(struct ma_state *mas,
+static inline void mas_rebalance(struct ma_state *mas,
 				struct maple_big_node *b_node)
 {
 	char empty_count = mas_mt_height(mas);
@@ -2976,9 +2971,6 @@ static inline int mas_rebalance(struct ma_state *mas,
 	 * tries to combine the data in the same way.  If one node contains the
 	 * entire range of the tree, then that node is used as a new root node.
 	 */
-	mas_node_count(mas, empty_count * 2 - 1);
-	if (mas_is_err(mas))
-		return 0;
 
 	mast.orig_l = &l_mas;
 	mast.orig_r = &r_mas;
@@ -3029,11 +3021,6 @@ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end
 
 	/* set up node. */
 	if (in_rcu) {
-		/* Allocate for both left and right as well as parent. */
-		mas_node_count(mas, 3);
-		if (mas_is_err(mas))
-			return;
-
 		newnode = mas_pop_node(mas);
 	} else {
 		newnode = &reuse;
@@ -3172,10 +3159,7 @@ static inline void mast_fill_bnode(struct maple_subtree_state *mast,
 	bool cp = true;
 	unsigned char split;
 
-	memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
-	memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
-	memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
-	mast->bn->b_end = 0;
+	memset(mast->bn, 0, sizeof(struct maple_big_node));
 
 	if (mte_is_root(mas->node)) {
 		cp = false;
@@ -3308,9 +3292,8 @@ static inline bool mas_push_data(struct ma_state *mas, int height,
  * mas_split() - Split data that is too big for one node into two.
  * @mas: The maple state
  * @b_node: The maple big node
- * Return: 1 on success, 0 on failure.
  */
-static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
+static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
 {
 	struct maple_subtree_state mast;
 	int height = 0;
@@ -3341,10 +3324,6 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
 
 	trace_ma_op(__func__, mas);
 	mas->depth = mas_mt_height(mas);
-	/* Allocation failures will happen early. */
-	mas_node_count(mas, 1 + mas->depth * 2);
-	if (mas_is_err(mas))
-		return 0;
 
 	mast.l = &l_mas;
 	mast.r = &r_mas;
@@ -3375,7 +3354,7 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
 		if (mas_push_data(mas, height, &mast, false))
 			break;
 
-		split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
+		split = mab_calc_split(mas, b_node, &mid_split);
 		mast_split_data(&mast, mas, split);
 		/*
 		 * Usually correct, mab_mas_cp in the above call overwrites
@@ -3392,75 +3371,25 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
 	mas->node = l_mas.node;
 	mas_wmb_replace(mas, old);
 	mtree_range_walk(mas);
-	return 1;
-}
-
-/*
- * mas_reuse_node() - Reuse the node to store the data.
- * @wr_mas: The maple write state
- * @bn: The maple big node
- * @end: The end of the data.
- *
- * Will always return false in RCU mode.
- *
- * Return: True if node was reused, false otherwise.
- */
-static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
-			  struct maple_big_node *bn, unsigned char end)
-{
-	/* Need to be rcu safe. */
-	if (mt_in_rcu(wr_mas->mas->tree))
-		return false;
-
-	if (end > bn->b_end) {
-		int clear = mt_slots[wr_mas->type] - bn->b_end;
-
-		memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
-		memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
-	}
-	mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
-	return true;
+	return;
 }
 
 /*
  * mas_commit_b_node() - Commit the big node into the tree.
  * @wr_mas: The maple write state
  * @b_node: The maple big node
- * @end: The end of the data.
  */
-static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas,
-			    struct maple_big_node *b_node, unsigned char end)
+static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
+			    struct maple_big_node *b_node)
 {
-	struct maple_node *node;
-	struct maple_enode *old_enode;
-	unsigned char b_end = b_node->b_end;
-	enum maple_type b_type = b_node->type;
-
-	old_enode = wr_mas->mas->node;
-	if ((b_end < mt_min_slots[b_type]) &&
-	    (!mte_is_root(old_enode)) &&
-	    (mas_mt_height(wr_mas->mas) > 1))
-		return mas_rebalance(wr_mas->mas, b_node);
-
-	if (b_end >= mt_slots[b_type])
-		return mas_split(wr_mas->mas, b_node);
+	enum store_type type = wr_mas->mas->store_type;
 
-	if (mas_reuse_node(wr_mas, b_node, end))
-		goto reuse_node;
+	WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
 
-	mas_node_count(wr_mas->mas, 1);
-	if (mas_is_err(wr_mas->mas))
-		return 0;
+	if (type == wr_rebalance)
+		return mas_rebalance(wr_mas->mas, b_node);
 
-	node = mas_pop_node(wr_mas->mas);
-	node->parent = mas_mn(wr_mas->mas)->parent;
-	wr_mas->mas->node = mt_mk_node(node, b_type);
-	mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
-	mas_replace_node(wr_mas->mas, old_enode);
-reuse_node:
-	mas_update_gap(wr_mas->mas);
-	wr_mas->mas->end = b_end;
-	return 1;
+	return mas_split(wr_mas->mas, b_node);
 }
 
 /*
@@ -3468,7 +3397,7 @@ reuse_node:
  * @mas: The maple state
  * @entry: The entry to store into the tree
  */
-static inline int mas_root_expand(struct ma_state *mas, void *entry)
+static inline void mas_root_expand(struct ma_state *mas, void *entry)
 {
 	void *contents = mas_root_locked(mas);
 	enum maple_type type = maple_leaf_64;
@@ -3477,10 +3406,6 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
 	unsigned long *pivots;
 	int slot = 0;
 
-	mas_node_count(mas, 1);
-	if (unlikely(mas_is_err(mas)))
-		return 0;
-
 	node = mas_pop_node(mas);
 	pivots = ma_pivots(node, type);
 	slots = ma_slots(node, type);
@@ -3508,12 +3433,23 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
 	ma_set_meta(node, maple_leaf_64, 0, slot);
 	/* swap the new root into the tree */
 	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
-	return slot;
+	return;
 }
 
+/*
+ * mas_store_root() - Storing value into root.
+ * @mas: The maple state
+ * @entry: The entry to store.
+ *
+ * There is no root node now and we are storing a value into the root - this
+ * function either assigns the pointer or expands into a node.
+ */
 static inline void mas_store_root(struct ma_state *mas, void *entry)
 {
-	if (likely((mas->last != 0) || (mas->index != 0)))
+	if (!entry) {
+		if (!mas->index)
+			rcu_assign_pointer(mas->tree->ma_root, NULL);
+	} else if (likely((mas->last != 0) || (mas->index != 0)))
 		mas_root_expand(mas, entry);
 	else if (((unsigned long) (entry) & 3) == 2)
 		mas_root_expand(mas, entry);
@@ -3526,10 +3462,7 @@ static inline void mas_store_root(struct ma_state *mas, void *entry)
 /*
  * mas_is_span_wr() - Check if the write needs to be treated as a write that
  * spans the node.
- * @mas: The maple state
- * @piv: The pivot value being written
- * @type: The maple node type
- * @entry: The data to write
+ * @wr_mas: The maple write state
  *
  * Spanning writes are writes that start in one node and end in another OR if
  * the write of a %NULL will cause the node to end with a %NULL.
@@ -3609,7 +3542,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
 	return true;
 }
 
-static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
+static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
 
@@ -3618,11 +3551,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
 		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
 						  mas->offset);
 		if (ma_is_leaf(wr_mas->type))
-			return true;
+			return;
 		mas_wr_walk_traverse(wr_mas);
-
 	}
-	return true;
 }
 /*
  * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
@@ -3730,10 +3661,8 @@ static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
  * @entry: The entry to store.
  *
  * Only valid when the index == 0 and the last == ULONG_MAX
- *
- * Return 0 on error, 1 on success.
  */
-static inline int mas_new_root(struct ma_state *mas, void *entry)
+static inline void mas_new_root(struct ma_state *mas, void *entry)
 {
 	struct maple_enode *root = mas_root_locked(mas);
 	enum maple_type type = maple_leaf_64;
@@ -3741,7 +3670,9 @@ static inline int mas_new_root(struct ma_state *mas, void *entry)
 	void __rcu **slots;
 	unsigned long *pivots;
 
-	if (!entry && !mas->index && mas->last == ULONG_MAX) {
+	WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX);
+
+	if (!entry) {
 		mas->depth = 0;
 		mas_set_height(mas);
 		rcu_assign_pointer(mas->tree->ma_root, entry);
@@ -3749,10 +3680,6 @@ static inline int mas_new_root(struct ma_state *mas, void *entry)
 		goto done;
 	}
 
-	mas_node_count(mas, 1);
-	if (mas_is_err(mas))
-		return 0;
-
 	node = mas_pop_node(mas);
 	pivots = ma_pivots(node, type);
 	slots = ma_slots(node, type);
@@ -3769,7 +3696,7 @@ done:
 	if (xa_is_node(root))
 		mte_destroy_walk(root, mas->tree);
 
-	return 1;
+	return;
 }
 /*
  * mas_wr_spanning_store() - Create a subtree with the store operation completed
@@ -3777,10 +3704,8 @@ done:
  * Note that mas is expected to point to the node which caused the store to
  * span.
  * @wr_mas: The maple write state
- *
- * Return: 0 on error, positive on success.
  */
-static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
+static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
 {
 	struct maple_subtree_state mast;
 	struct maple_big_node b_node;
@@ -3815,9 +3740,6 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
 	 * entries per level plus a new root.
 	 */
 	height = mas_mt_height(mas);
-	mas_node_count(mas, 1 + height * 3);
-	if (mas_is_err(mas))
-		return 0;
 
 	/*
 	 * Set up right side.  Need to get to the next offset after the spanning
@@ -3853,8 +3775,8 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
 	memset(&b_node, 0, sizeof(struct maple_big_node));
 	/* Copy l_mas and store the value in b_node. */
 	mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
-	/* Copy r_mas into b_node. */
-	if (r_mas.offset <= r_mas.end)
+	/* Copy r_mas into b_node if there is anything to copy. */
+	if (r_mas.max > r_mas.last)
 		mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
 			   &b_node, b_node.b_end + 1);
 	else
@@ -3875,10 +3797,8 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
  * @wr_mas: The maple write state
  *
  * Attempts to reuse the node, but may allocate.
- *
- * Return: True if stored, false otherwise
  */
-static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas,
+static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
 				     unsigned char new_end)
 {
 	struct ma_state *mas = wr_mas->mas;
@@ -3889,11 +3809,6 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas,
 	unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
 	bool in_rcu = mt_in_rcu(mas->tree);
 
-	/* Check if there is enough data. The room is enough. */
-	if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
-	    !(mas->mas_flags & MA_STATE_BULK))
-		return false;
-
 	if (mas->last == wr_mas->end_piv)
 		offset_end++; /* don't copy this offset */
 	else if (unlikely(wr_mas->r_max == ULONG_MAX))
@@ -3901,10 +3816,6 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas,
 
 	/* set up node. */
 	if (in_rcu) {
-		mas_node_count(mas, 1);
-		if (mas_is_err(mas))
-			return false;
-
 		newnode = mas_pop_node(mas);
 	} else {
 		memset(&reuse, 0, sizeof(struct maple_node));
@@ -3960,16 +3871,14 @@ done:
 	trace_ma_write(__func__, mas, 0, wr_mas->entry);
 	mas_update_gap(mas);
 	mas->end = new_end;
-	return true;
+	return;
 }
 
 /*
  * mas_wr_slot_store: Attempt to store a value in a slot.
  * @wr_mas: the maple write state
- *
- * Return: True if stored, false otherwise
  */
-static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
+static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
 	unsigned char offset = mas->offset;
@@ -3990,7 +3899,8 @@ static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
 			wr_mas->pivots[offset] = mas->index - 1;
 			mas->offset++; /* Keep mas accurate. */
 		}
-	} else if (!mt_in_rcu(mas->tree)) {
+	} else {
+		WARN_ON_ONCE(mt_in_rcu(mas->tree));
 		/*
 		 * Expand the range, only partially overwriting the previous and
 		 * next ranges
@@ -4000,8 +3910,6 @@ static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
 		wr_mas->pivots[offset] = mas->index - 1;
 		wr_mas->pivots[offset + 1] = mas->last;
 		mas->offset++; /* Keep mas accurate. */
-	} else {
-		return false;
 	}
 
 	trace_ma_write(__func__, mas, 0, wr_mas->entry);
@@ -4012,7 +3920,7 @@ static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
 	if (!wr_mas->entry || gap)
 		mas_update_gap(mas);
 
-	return true;
+	return;
 }
 
 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
@@ -4061,9 +3969,6 @@ static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
 		wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
 	else
 		wr_mas->end_piv = wr_mas->mas->max;
-
-	if (!wr_mas->entry)
-		mas_wr_extend_null(wr_mas);
 }
 
 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
@@ -4089,23 +3994,13 @@ static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
  * This is currently unsafe in rcu mode since the end of the node may be cached
  * by readers while the node contents may be updated which could result in
  * inaccurate information.
- *
- * Return: True if appended, false otherwise
  */
-static inline bool mas_wr_append(struct ma_wr_state *wr_mas,
+static inline void mas_wr_append(struct ma_wr_state *wr_mas,
 		unsigned char new_end)
 {
-	struct ma_state *mas;
+	struct ma_state *mas = wr_mas->mas;
 	void __rcu **slots;
-	unsigned char end;
-
-	mas = wr_mas->mas;
-	if (mt_in_rcu(mas->tree))
-		return false;
-
-	end = mas->end;
-	if (mas->offset != end)
-		return false;
+	unsigned char end = mas->end;
 
 	if (new_end < mt_pivots[wr_mas->type]) {
 		wr_mas->pivots[new_end] = wr_mas->pivots[end];
@@ -4139,7 +4034,7 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas,
 
 	mas->end = new_end;
 	trace_ma_write(__func__, mas, new_end, wr_mas->entry);
-	return  true;
+	return;
 }
 
 /*
@@ -4155,79 +4050,213 @@ static void mas_wr_bnode(struct ma_wr_state *wr_mas)
 	trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
 	memset(&b_node, 0, sizeof(struct maple_big_node));
 	mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
-	mas_commit_b_node(wr_mas, &b_node, wr_mas->mas->end);
+	mas_commit_b_node(wr_mas, &b_node);
 }
 
-static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
+/*
+ * mas_wr_store_entry() - Internal call to store a value
+ * @wr_mas: The maple write state
+ */
+static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
-	unsigned char new_end;
+	unsigned char new_end = mas_wr_new_end(wr_mas);
 
-	/* Direct replacement */
-	if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
+	switch (mas->store_type) {
+	case wr_invalid:
+		MT_BUG_ON(mas->tree, 1);
+		return;
+	case wr_new_root:
+		mas_new_root(mas, wr_mas->entry);
+		break;
+	case wr_store_root:
+		mas_store_root(mas, wr_mas->entry);
+		break;
+	case wr_exact_fit:
 		rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
 		if (!!wr_mas->entry ^ !!wr_mas->content)
 			mas_update_gap(mas);
-		return;
+		break;
+	case wr_append:
+		mas_wr_append(wr_mas, new_end);
+		break;
+	case wr_slot_store:
+		mas_wr_slot_store(wr_mas);
+		break;
+	case wr_node_store:
+		mas_wr_node_store(wr_mas, new_end);
+		break;
+	case wr_spanning_store:
+		mas_wr_spanning_store(wr_mas);
+		break;
+	case wr_split_store:
+	case wr_rebalance:
+		mas_wr_bnode(wr_mas);
+		break;
+	}
+
+	return;
+}
+
+static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
+{
+	struct ma_state *mas = wr_mas->mas;
+
+	if (!mas_is_active(mas)) {
+		if (mas_is_start(mas))
+			goto set_content;
+
+		if (unlikely(mas_is_paused(mas)))
+			goto reset;
+
+		if (unlikely(mas_is_none(mas)))
+			goto reset;
+
+		if (unlikely(mas_is_overflow(mas)))
+			goto reset;
+
+		if (unlikely(mas_is_underflow(mas)))
+			goto reset;
 	}
 
 	/*
-	 * new_end exceeds the size of the maple node and cannot enter the fast
-	 * path.
+	 * A less strict version of mas_is_span_wr() where we allow spanning
+	 * writes within this node.  This is to stop partial walks in
+	 * mas_prealloc() from being reset.
 	 */
-	new_end = mas_wr_new_end(wr_mas);
-	if (new_end >= mt_slots[wr_mas->type])
-		goto slow_path;
+	if (mas->last > mas->max)
+		goto reset;
 
-	/* Attempt to append */
-	if (mas_wr_append(wr_mas, new_end))
-		return;
+	if (wr_mas->entry)
+		goto set_content;
 
-	if (new_end == mas->end && mas_wr_slot_store(wr_mas))
-		return;
+	if (mte_is_leaf(mas->node) && mas->last == mas->max)
+		goto reset;
 
-	if (mas_wr_node_store(wr_mas, new_end))
-		return;
+	goto set_content;
 
-	if (mas_is_err(mas))
-		return;
+reset:
+	mas_reset(mas);
+set_content:
+	wr_mas->content = mas_start(mas);
+}
 
-slow_path:
-	mas_wr_bnode(wr_mas);
+/**
+ * mas_prealloc_calc() - Calculate number of nodes needed for a
+ * given store oepration
+ * @mas: The maple state
+ * @entry: The entry to store into the tree
+ *
+ * Return: Number of nodes required for preallocation.
+ */
+static inline int mas_prealloc_calc(struct ma_state *mas, void *entry)
+{
+	int ret = mas_mt_height(mas) * 3 + 1;
+
+	switch (mas->store_type) {
+	case wr_invalid:
+		WARN_ON_ONCE(1);
+		break;
+	case wr_new_root:
+		ret = 1;
+		break;
+	case wr_store_root:
+		if (likely((mas->last != 0) || (mas->index != 0)))
+			ret = 1;
+		else if (((unsigned long) (entry) & 3) == 2)
+			ret = 1;
+		else
+			ret = 0;
+		break;
+	case wr_spanning_store:
+		ret =  mas_mt_height(mas) * 3 + 1;
+		break;
+	case wr_split_store:
+		ret =  mas_mt_height(mas) * 2 + 1;
+		break;
+	case wr_rebalance:
+		ret =  mas_mt_height(mas) * 2 - 1;
+		break;
+	case wr_node_store:
+		ret = mt_in_rcu(mas->tree) ? 1 : 0;
+		break;
+	case wr_append:
+	case wr_exact_fit:
+	case wr_slot_store:
+		ret = 0;
+	}
+
+	return ret;
 }
 
 /*
- * mas_wr_store_entry() - Internal call to store a value
- * @mas: The maple state
- * @entry: The entry to store.
+ * mas_wr_store_type() - Determine the store type for a given
+ * store operation.
+ * @wr_mas: The maple write state
  *
- * Return: The contents that was stored at the index.
+ * Return: the type of store needed for the operation
  */
-static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
+static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
+	unsigned char new_end;
 
-	wr_mas->content = mas_start(mas);
-	if (mas_is_none(mas) || mas_is_ptr(mas)) {
-		mas_store_root(mas, wr_mas->entry);
-		return wr_mas->content;
-	}
+	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
+		return wr_store_root;
 
-	if (unlikely(!mas_wr_walk(wr_mas))) {
-		mas_wr_spanning_store(wr_mas);
-		return wr_mas->content;
-	}
+	if (unlikely(!mas_wr_walk(wr_mas)))
+		return wr_spanning_store;
 
 	/* At this point, we are at the leaf node that needs to be altered. */
 	mas_wr_end_piv(wr_mas);
-	/* New root for a single pointer */
-	if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
-		mas_new_root(mas, wr_mas->entry);
-		return wr_mas->content;
+	if (!wr_mas->entry)
+		mas_wr_extend_null(wr_mas);
+
+	if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last))
+		return wr_exact_fit;
+
+	if (unlikely(!mas->index && mas->last == ULONG_MAX))
+		return wr_new_root;
+
+	new_end = mas_wr_new_end(wr_mas);
+	/* Potential spanning rebalance collapsing a node */
+	if (new_end < mt_min_slots[wr_mas->type]) {
+		if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK))
+			return  wr_rebalance;
+		return wr_node_store;
 	}
 
-	mas_wr_modify(wr_mas);
-	return wr_mas->content;
+	if (new_end >= mt_slots[wr_mas->type])
+		return wr_split_store;
+
+	if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end))
+		return wr_append;
+
+	if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
+		(wr_mas->offset_end - mas->offset == 1)))
+		return wr_slot_store;
+
+	return wr_node_store;
+}
+
+/**
+ * mas_wr_preallocate() - Preallocate enough nodes for a store operation
+ * @wr_mas: The maple write state
+ * @entry: The entry that will be stored
+ *
+ */
+static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
+{
+	struct ma_state *mas = wr_mas->mas;
+	int request;
+
+	mas_wr_prealloc_setup(wr_mas);
+	mas->store_type = mas_wr_store_type(wr_mas);
+	request = mas_prealloc_calc(mas, entry);
+	if (!request)
+		return;
+
+	mas_node_count(mas, request);
 }
 
 /**
@@ -4260,26 +4289,24 @@ static inline void *mas_insert(struct ma_state *mas, void *entry)
 	if (wr_mas.content)
 		goto exists;
 
-	if (mas_is_none(mas) || mas_is_ptr(mas)) {
-		mas_store_root(mas, entry);
+	mas_wr_preallocate(&wr_mas, entry);
+	if (mas_is_err(mas))
 		return NULL;
-	}
 
 	/* spanning writes always overwrite something */
-	if (!mas_wr_walk(&wr_mas))
+	if (mas->store_type == wr_spanning_store)
 		goto exists;
 
 	/* At this point, we are at the leaf node that needs to be altered. */
-	wr_mas.offset_end = mas->offset;
-	wr_mas.end_piv = wr_mas.r_max;
-
-	if (wr_mas.content || (mas->last > wr_mas.r_max))
-		goto exists;
+	if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) {
+		wr_mas.offset_end = mas->offset;
+		wr_mas.end_piv = wr_mas.r_max;
 
-	if (!entry)
-		return NULL;
+		if (wr_mas.content || (mas->last > wr_mas.r_max))
+			goto exists;
+	}
 
-	mas_wr_modify(&wr_mas);
+	mas_wr_store_entry(&wr_mas);
 	return wr_mas.content;
 
 exists:
@@ -4316,6 +4343,7 @@ int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
 		ret = 1;
 	}
 	if (ret < 0 && range_lo > min) {
+		mas_reset(mas);
 		ret = mas_empty_area(mas, min, range_hi, 1);
 		if (ret == 0)
 			ret = 1;
@@ -4334,6 +4362,7 @@ int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
 	if (*next == 0)
 		mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
 
+	mas_destroy(mas);
 	return ret;
 }
 EXPORT_SYMBOL(mas_alloc_cyclic);
@@ -4443,9 +4472,8 @@ no_entry:
  * mas_prev_slot() - Get the entry in the previous slot
  *
  * @mas: The maple state
- * @max: The minimum starting range
+ * @min: The minimum starting range
  * @empty: Can be empty
- * @set_underflow: Set the @mas->node to underflow state on limit.
  *
  * Return: The entry in the previous slot which is possibly NULL
  */
@@ -4528,6 +4556,7 @@ underflow:
 /*
  * mas_next_node() - Get the next node at the same level in the tree.
  * @mas: The maple state
+ * @node: The maple node
  * @max: The maximum pivot value to check.
  *
  * The next value will be mas->node[mas->offset] or the status will have
@@ -4618,8 +4647,6 @@ overflow:
  * @mas: The maple state
  * @max: The maximum starting range
  * @empty: Can be empty
- * @set_overflow: Should @mas->node be set to overflow when the limit is
- * reached.
  *
  * Return: The entry in the next slot which is possibly NULL
  */
@@ -4708,29 +4735,6 @@ again:
 }
 
 /*
- * mas_next_entry() - Internal function to get the next entry.
- * @mas: The maple state
- * @limit: The maximum range start.
- *
- * Set the @mas->node to the next entry and the range_start to
- * the beginning value for the entry.  Does not check beyond @limit.
- * Sets @mas->index and @mas->last to the range, Does not update @mas->index and
- * @mas->last on overflow.
- * Restarts on dead nodes.
- *
- * Return: the next entry or %NULL.
- */
-static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
-{
-	if (mas->last >= limit) {
-		mas->status = ma_overflow;
-		return NULL;
-	}
-
-	return mas_next_slot(mas, limit, false);
-}
-
-/*
  * mas_rev_awalk() - Internal function.  Reverse allocation walk.  Find the
  * highest gap address of a given size in a given node and descend.
  * @mas: The maple state
@@ -4865,15 +4869,14 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
 		if (gap >= size) {
 			if (ma_is_leaf(type)) {
 				found = true;
-				goto done;
-			}
-			if (mas->index <= pivot) {
-				mas->node = mas_slot(mas, slots, offset);
-				mas->min = min;
-				mas->max = pivot;
-				offset = 0;
 				break;
 			}
+
+			mas->node = mas_slot(mas, slots, offset);
+			mas->min = min;
+			mas->max = pivot;
+			offset = 0;
+			break;
 		}
 next_slot:
 		min = pivot + 1;
@@ -4883,9 +4886,6 @@ next_slot:
 		}
 	}
 
-	if (mte_is_root(mas->node))
-		found = true;
-done:
 	mas->offset = offset;
 	return found;
 }
@@ -4989,8 +4989,8 @@ static inline void mas_awalk(struct ma_state *mas, unsigned long size)
 	 * There are 4 options:
 	 * go to child (descend)
 	 * go back to parent (ascend)
-	 * no gap found. (return, slot == MAPLE_NODE_SLOTS)
-	 * found the gap. (return, slot != MAPLE_NODE_SLOTS)
+	 * no gap found. (return, error == -EBUSY)
+	 * found the gap. (return)
 	 */
 	while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
 		if (last == mas->node)
@@ -5075,9 +5075,6 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
 		return xa_err(mas->node);
 
 	offset = mas->offset;
-	if (unlikely(offset == MAPLE_NODE_SLOTS))
-		return -EBUSY;
-
 	node = mas_mn(mas);
 	mt = mte_node_type(mas->node);
 	pivots = ma_pivots(node, mt);
@@ -5153,9 +5150,9 @@ EXPORT_SYMBOL_GPL(mas_empty_area_rev);
 
 /*
  * mte_dead_leaves() - Mark all leaves of a node as dead.
- * @mas: The maple state
+ * @enode: the encoded node
+ * @mt: the maple tree
  * @slots: Pointer to the slot array
- * @type: The maple node type
  *
  * Must hold the write lock.
  *
@@ -5361,47 +5358,6 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
 		mt_destroy_walk(enode, mt, true);
 	}
 }
-
-static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
-{
-	if (!mas_is_active(wr_mas->mas)) {
-		if (mas_is_start(wr_mas->mas))
-			return;
-
-		if (unlikely(mas_is_paused(wr_mas->mas)))
-			goto reset;
-
-		if (unlikely(mas_is_none(wr_mas->mas)))
-			goto reset;
-
-		if (unlikely(mas_is_overflow(wr_mas->mas)))
-			goto reset;
-
-		if (unlikely(mas_is_underflow(wr_mas->mas)))
-			goto reset;
-	}
-
-	/*
-	 * A less strict version of mas_is_span_wr() where we allow spanning
-	 * writes within this node.  This is to stop partial walks in
-	 * mas_prealloc() from being reset.
-	 */
-	if (wr_mas->mas->last > wr_mas->mas->max)
-		goto reset;
-
-	if (wr_mas->entry)
-		return;
-
-	if (mte_is_leaf(wr_mas->mas->node) &&
-	    wr_mas->mas->last == wr_mas->mas->max)
-		goto reset;
-
-	return;
-
-reset:
-	mas_reset(wr_mas->mas);
-}
-
 /* Interface */
 
 /**
@@ -5410,19 +5366,19 @@ reset:
  * @entry: The entry to store.
  *
  * The @mas->index and @mas->last is used to set the range for the @entry.
- * Note: The @mas should have pre-allocated entries to ensure there is memory to
- * store the entry.  Please see mas_expected_entries()/mas_destroy() for more details.
  *
  * Return: the first entry between mas->index and mas->last or %NULL.
  */
 void *mas_store(struct ma_state *mas, void *entry)
 {
+	int request;
 	MA_WR_STATE(wr_mas, mas, entry);
 
 	trace_ma_write(__func__, mas, 0, entry);
 #ifdef CONFIG_DEBUG_MAPLE_TREE
 	if (MAS_WARN_ON(mas, mas->index > mas->last))
-		pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry);
+		pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
+		       entry);
 
 	if (mas->index > mas->last) {
 		mas_set_err(mas, -EINVAL);
@@ -5437,8 +5393,25 @@ void *mas_store(struct ma_state *mas, void *entry)
 	 * want to examine what happens if a single store operation was to
 	 * overwrite multiple entries within a self-balancing B-Tree.
 	 */
-	mas_wr_store_setup(&wr_mas);
+	mas_wr_prealloc_setup(&wr_mas);
+	mas->store_type = mas_wr_store_type(&wr_mas);
+	if (mas->mas_flags & MA_STATE_PREALLOC) {
+		mas_wr_store_entry(&wr_mas);
+		MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
+		return wr_mas.content;
+	}
+
+	request = mas_prealloc_calc(mas, entry);
+	if (!request)
+		goto store;
+
+	mas_node_count(mas, request);
+	if (mas_is_err(mas))
+		return NULL;
+
+store:
 	mas_wr_store_entry(&wr_mas);
+	mas_destroy(mas);
 	return wr_mas.content;
 }
 EXPORT_SYMBOL_GPL(mas_store);
@@ -5454,19 +5427,28 @@ EXPORT_SYMBOL_GPL(mas_store);
  */
 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
 {
+	unsigned long index = mas->index;
+	unsigned long last = mas->last;
 	MA_WR_STATE(wr_mas, mas, entry);
+	int ret = 0;
 
-	mas_wr_store_setup(&wr_mas);
-	trace_ma_write(__func__, mas, 0, entry);
 retry:
-	mas_wr_store_entry(&wr_mas);
-	if (unlikely(mas_nomem(mas, gfp)))
+	mas_wr_preallocate(&wr_mas, entry);
+	if (unlikely(mas_nomem(mas, gfp))) {
+		if (!entry)
+			__mas_set_range(mas, index, last);
 		goto retry;
+	}
 
-	if (unlikely(mas_is_err(mas)))
-		return xa_err(mas->node);
+	if (mas_is_err(mas)) {
+		ret = xa_err(mas->node);
+		goto out;
+	}
 
-	return 0;
+	mas_wr_store_entry(&wr_mas);
+out:
+	mas_destroy(mas);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mas_store_gfp);
 
@@ -5480,7 +5462,19 @@ void mas_store_prealloc(struct ma_state *mas, void *entry)
 {
 	MA_WR_STATE(wr_mas, mas, entry);
 
-	mas_wr_store_setup(&wr_mas);
+	if (mas->store_type == wr_store_root) {
+		mas_wr_prealloc_setup(&wr_mas);
+		goto store;
+	}
+
+	mas_wr_walk_descend(&wr_mas);
+	if (mas->store_type != wr_spanning_store) {
+		/* set wr_mas->content to current slot */
+		wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset);
+		mas_wr_end_piv(&wr_mas);
+	}
+
+store:
 	trace_ma_write(__func__, mas, 0, entry);
 	mas_wr_store_entry(&wr_mas);
 	MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
@@ -5499,70 +5493,25 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc);
 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
 {
 	MA_WR_STATE(wr_mas, mas, entry);
-	unsigned char node_size;
-	int request = 1;
-	int ret;
-
-
-	if (unlikely(!mas->index && mas->last == ULONG_MAX))
-		goto ask_now;
-
-	mas_wr_store_setup(&wr_mas);
-	wr_mas.content = mas_start(mas);
-	/* Root expand */
-	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
-		goto ask_now;
-
-	if (unlikely(!mas_wr_walk(&wr_mas))) {
-		/* Spanning store, use worst case for now */
-		request = 1 + mas_mt_height(mas) * 3;
-		goto ask_now;
-	}
-
-	/* At this point, we are at the leaf node that needs to be altered. */
-	/* Exact fit, no nodes needed. */
-	if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last)
-		return 0;
-
-	mas_wr_end_piv(&wr_mas);
-	node_size = mas_wr_new_end(&wr_mas);
+	int ret = 0;
+	int request;
 
-	/* Slot store, does not require additional nodes */
-	if (node_size == mas->end) {
-		/* reuse node */
-		if (!mt_in_rcu(mas->tree))
-			return 0;
-		/* shifting boundary */
-		if (wr_mas.offset_end - mas->offset == 1)
-			return 0;
-	}
+	mas_wr_prealloc_setup(&wr_mas);
+	mas->store_type = mas_wr_store_type(&wr_mas);
+	request = mas_prealloc_calc(mas, entry);
+	if (!request)
+		return ret;
 
-	if (node_size >= mt_slots[wr_mas.type]) {
-		/* Split, worst case for now. */
-		request = 1 + mas_mt_height(mas) * 2;
-		goto ask_now;
+	mas_node_count_gfp(mas, request, gfp);
+	if (mas_is_err(mas)) {
+		mas_set_alloc_req(mas, 0);
+		ret = xa_err(mas->node);
+		mas_destroy(mas);
+		mas_reset(mas);
+		return ret;
 	}
 
-	/* New root needs a single node */
-	if (unlikely(mte_is_root(mas->node)))
-		goto ask_now;
-
-	/* Potential spanning rebalance collapsing a node, use worst-case */
-	if (node_size  - 1 <= mt_min_slots[wr_mas.type])
-		request = mas_mt_height(mas) * 2 - 1;
-
-	/* node store, slot store needs one node */
-ask_now:
-	mas_node_count_gfp(mas, request, gfp);
 	mas->mas_flags |= MA_STATE_PREALLOC;
-	if (likely(!mas_is_err(mas)))
-		return 0;
-
-	mas_set_alloc_req(mas, 0);
-	ret = xa_err(mas->node);
-	mas_reset(mas);
-	mas_destroy(mas);
-	mas_reset(mas);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mas_preallocate);
@@ -5588,7 +5537,8 @@ void mas_destroy(struct ma_state *mas)
 	 */
 	if (mas->mas_flags & MA_STATE_REBALANCE) {
 		unsigned char end;
-
+		if (mas_is_err(mas))
+			mas_reset(mas);
 		mas_start(mas);
 		mtree_range_walk(mas);
 		end = mas->end + 1;
@@ -6248,24 +6198,32 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
 void *mas_erase(struct ma_state *mas)
 {
 	void *entry;
+	unsigned long index = mas->index;
 	MA_WR_STATE(wr_mas, mas, NULL);
 
 	if (!mas_is_active(mas) || !mas_is_start(mas))
 		mas->status = ma_start;
 
-	/* Retry unnecessary when holding the write lock. */
+write_retry:
 	entry = mas_state_walk(mas);
 	if (!entry)
 		return NULL;
 
-write_retry:
 	/* Must reset to ensure spanning writes of last slot are detected */
 	mas_reset(mas);
-	mas_wr_store_setup(&wr_mas);
-	mas_wr_store_entry(&wr_mas);
-	if (mas_nomem(mas, GFP_KERNEL))
+	mas_wr_preallocate(&wr_mas, NULL);
+	if (mas_nomem(mas, GFP_KERNEL)) {
+		/* in case the range of entry changed when unlocked */
+		mas->index = mas->last = index;
 		goto write_retry;
+	}
+
+	if (mas_is_err(mas))
+		goto out;
 
+	mas_wr_store_entry(&wr_mas);
+out:
+	mas_destroy(mas);
 	return entry;
 }
 EXPORT_SYMBOL_GPL(mas_erase);
@@ -6280,10 +6238,8 @@ EXPORT_SYMBOL_GPL(mas_erase);
 bool mas_nomem(struct ma_state *mas, gfp_t gfp)
 	__must_hold(mas->tree->ma_lock)
 {
-	if (likely(mas->node != MA_ERROR(-ENOMEM))) {
-		mas_destroy(mas);
+	if (likely(mas->node != MA_ERROR(-ENOMEM)))
 		return false;
-	}
 
 	if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
 		mtree_unlock(mas->tree);
@@ -6360,7 +6316,7 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index,
 		unsigned long last, void *entry, gfp_t gfp)
 {
 	MA_STATE(mas, mt, index, last);
-	MA_WR_STATE(wr_mas, &mas, entry);
+	int ret = 0;
 
 	trace_ma_write(__func__, &mas, 0, entry);
 	if (WARN_ON_ONCE(xa_is_advanced(entry)))
@@ -6370,16 +6326,10 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index,
 		return -EINVAL;
 
 	mtree_lock(mt);
-retry:
-	mas_wr_store_entry(&wr_mas);
-	if (mas_nomem(&mas, gfp))
-		goto retry;
-
+	ret = mas_store_gfp(&mas, entry, gfp);
 	mtree_unlock(mt);
-	if (mas_is_err(&mas))
-		return xa_err(mas.node);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(mtree_store_range);
 
@@ -6415,6 +6365,7 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first,
 		unsigned long last, void *entry, gfp_t gfp)
 {
 	MA_STATE(ms, mt, first, last);
+	int ret = 0;
 
 	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return -EINVAL;
@@ -6430,9 +6381,10 @@ retry:
 
 	mtree_unlock(mt);
 	if (mas_is_err(&ms))
-		return xa_err(ms.node);
+		ret = xa_err(ms.node);
 
-	return 0;
+	mas_destroy(&ms);
+	return ret;
 }
 EXPORT_SYMBOL(mtree_insert_range);
 
@@ -6487,6 +6439,7 @@ retry:
 
 unlock:
 	mtree_unlock(mt);
+	mas_destroy(&mas);
 	return ret;
 }
 EXPORT_SYMBOL(mtree_alloc_range);
@@ -6568,6 +6521,7 @@ retry:
 
 unlock:
 	mtree_unlock(mt);
+	mas_destroy(&mas);
 	return ret;
 }
 EXPORT_SYMBOL(mtree_alloc_rrange);
@@ -6943,7 +6897,7 @@ retry:
 		goto unlock;
 
 	while (mas_is_active(&mas) && (mas.last < max)) {
-		entry = mas_next_entry(&mas, max);
+		entry = mas_next_slot(&mas, max, false);
 		if (likely(entry && !xa_is_zero(entry)))
 			break;
 	}
@@ -7000,6 +6954,19 @@ void mt_set_non_kernel(unsigned int val)
 	kmem_cache_set_non_kernel(maple_node_cache, val);
 }
 
+extern void kmem_cache_set_callback(struct kmem_cache *cachep,
+		void (*callback)(void *));
+void mt_set_callback(void (*callback)(void *))
+{
+	kmem_cache_set_callback(maple_node_cache, callback);
+}
+
+extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
+void mt_set_private(void *private)
+{
+	kmem_cache_set_private(maple_node_cache, private);
+}
+
 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
 unsigned long mt_get_alloc_size(void)
 {
@@ -7116,14 +7083,14 @@ static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
 	mt_dump_range(min, max, depth, format);
 
 	if (xa_is_value(entry))
-		pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
-				xa_to_value(entry), entry);
+		pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry),
+			xa_to_value(entry), entry);
 	else if (xa_is_zero(entry))
 		pr_cont("zero (%ld)\n", xa_to_internal(entry));
 	else if (mt_is_reserved(entry))
-		pr_cont("UNKNOWN ENTRY (%p)\n", entry);
+		pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry);
 	else
-		pr_cont("%p\n", entry);
+		pr_cont(PTR_FMT "\n", entry);
 }
 
 static void mt_dump_range64(const struct maple_tree *mt, void *entry,
@@ -7139,13 +7106,13 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
 	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
 		switch(format) {
 		case mt_dump_hex:
-			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
 			break;
 		case mt_dump_dec:
-			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
 		}
 	}
-	pr_cont("%p\n", node->slot[i]);
+	pr_cont(PTR_FMT "\n", node->slot[i]);
 	for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
 		unsigned long last = max;
 
@@ -7167,11 +7134,11 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
 		if (last > max) {
 			switch(format) {
 			case mt_dump_hex:
-				pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
+				pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
 					node, last, max, i);
 				break;
 			case mt_dump_dec:
-				pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+				pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
 					node, last, max, i);
 			}
 		}
@@ -7184,7 +7151,6 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 	enum mt_dump_format format)
 {
 	struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
-	bool leaf = mte_is_leaf(entry);
 	unsigned long first = min;
 	int i;
 
@@ -7202,13 +7168,13 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 	for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
 		switch (format) {
 		case mt_dump_hex:
-			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
 			break;
 		case mt_dump_dec:
-			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+			pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
 		}
 	}
-	pr_cont("%p\n", node->slot[i]);
+	pr_cont(PTR_FMT "\n", node->slot[i]);
 	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
 		unsigned long last = max;
 
@@ -7218,19 +7184,22 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 			break;
 		if (last == 0 && i > 0)
 			break;
-		if (leaf)
-			mt_dump_entry(mt_slot(mt, node->slot, i),
-					first, last, depth + 1, format);
-		else if (node->slot[i])
+		if (node->slot[i])
 			mt_dump_node(mt, mt_slot(mt, node->slot, i),
 					first, last, depth + 1, format);
 
 		if (last == max)
 			break;
 		if (last > max) {
-			pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+			switch(format) {
+			case mt_dump_hex:
+				pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
 					node, last, max, i);
-			break;
+				break;
+			case mt_dump_dec:
+				pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
+					node, last, max, i);
+			}
 		}
 		first = last + 1;
 	}
@@ -7246,8 +7215,8 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry,
 
 	mt_dump_range(min, max, depth, format);
 
-	pr_cont("node %p depth %d type %d parent %p", node, depth, type,
-			node ? node->parent : NULL);
+	pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node,
+		depth, type, node ? node->parent : NULL);
 	switch (type) {
 	case maple_dense:
 		pr_cont("\n");
@@ -7275,12 +7244,14 @@ void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
 {
 	void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
 
-	pr_info("maple_tree(%p) flags %X, height %u root %p\n",
+	pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n",
 		 mt, mt->ma_flags, mt_height(mt), entry);
-	if (!xa_is_node(entry))
-		mt_dump_entry(entry, 0, 0, 0, format);
-	else if (entry)
+	if (xa_is_node(entry))
 		mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
+	else if (entry)
+		mt_dump_entry(entry, 0, 0, 0, format);
+	else
+		pr_info("(empty)\n");
 }
 EXPORT_SYMBOL_GPL(mt_dump);
 
@@ -7327,7 +7298,7 @@ static void mas_validate_gaps(struct ma_state *mas)
 			MT_BUG_ON(mas->tree, !entry);
 
 			if (gap > p_end - p_start + 1) {
-				pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
+				pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n",
 				       mas_mn(mas), i, gap, p_end, p_start,
 				       p_end - p_start + 1);
 				MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
@@ -7347,19 +7318,19 @@ counted:
 		MT_BUG_ON(mas->tree, !gaps);
 		offset = ma_meta_gap(node);
 		if (offset > i) {
-			pr_err("gap offset %p[%u] is invalid\n", node, offset);
+			pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset);
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		if (gaps[offset] != max_gap) {
-			pr_err("gap %p[%u] is not the largest gap %lu\n",
+			pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n",
 			       node, offset, max_gap);
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		for (i++ ; i < mt_slot_count(mte); i++) {
 			if (gaps[i] != 0) {
-				pr_err("gap %p[%u] beyond node limit != 0\n",
+				pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n",
 				       node, i);
 				MT_BUG_ON(mas->tree, 1);
 			}
@@ -7373,7 +7344,7 @@ counted:
 	p_mn = mte_parent(mte);
 	MT_BUG_ON(mas->tree, max_gap > mas->max);
 	if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
-		pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
+		pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap);
 		mt_dump(mas->tree, mt_dump_hex);
 		MT_BUG_ON(mas->tree, 1);
 	}
@@ -7403,11 +7374,11 @@ static void mas_validate_parent_slot(struct ma_state *mas)
 		node = mas_slot(mas, slots, i);
 		if (i == p_slot) {
 			if (node != mas->node)
-				pr_err("parent %p[%u] does not have %p\n",
+				pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n",
 					parent, i, mas_mn(mas));
 			MT_BUG_ON(mas->tree, node != mas->node);
 		} else if (node == mas->node) {
-			pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
+			pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n",
 			       mas_mn(mas), parent, i, p_slot);
 			MT_BUG_ON(mas->tree, node == mas->node);
 		}
@@ -7429,20 +7400,20 @@ static void mas_validate_child_slot(struct ma_state *mas)
 		child = mas_slot(mas, slots, i);
 
 		if (!child) {
-			pr_err("Non-leaf node lacks child at %p[%u]\n",
+			pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n",
 			       mas_mn(mas), i);
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		if (mte_parent_slot(child) != i) {
-			pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
+			pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n",
 			       mas_mn(mas), i, mte_to_node(child),
 			       mte_parent_slot(child));
 			MT_BUG_ON(mas->tree, 1);
 		}
 
 		if (mte_parent(child) != mte_to_node(mas->node)) {
-			pr_err("child %p has parent %p not %p\n",
+			pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n",
 			       mte_to_node(child), mte_parent(child),
 			       mte_to_node(mas->node));
 			MT_BUG_ON(mas->tree, 1);
@@ -7472,24 +7443,24 @@ static void mas_validate_limits(struct ma_state *mas)
 		piv = mas_safe_pivot(mas, pivots, i, type);
 
 		if (!piv && (i != 0)) {
-			pr_err("Missing node limit pivot at %p[%u]",
+			pr_err("Missing node limit pivot at " PTR_FMT "[%u]",
 			       mas_mn(mas), i);
 			MAS_WARN_ON(mas, 1);
 		}
 
 		if (prev_piv > piv) {
-			pr_err("%p[%u] piv %lu < prev_piv %lu\n",
+			pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n",
 				mas_mn(mas), i, piv, prev_piv);
 			MAS_WARN_ON(mas, piv < prev_piv);
 		}
 
 		if (piv < mas->min) {
-			pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
+			pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i,
 				piv, mas->min);
 			MAS_WARN_ON(mas, piv < mas->min);
 		}
 		if (piv > mas->max) {
-			pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
+			pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i,
 				piv, mas->max);
 			MAS_WARN_ON(mas, piv > mas->max);
 		}
@@ -7499,7 +7470,7 @@ static void mas_validate_limits(struct ma_state *mas)
 	}
 
 	if (mas_data_end(mas) != i) {
-		pr_err("node%p: data_end %u != the last slot offset %u\n",
+		pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n",
 		       mas_mn(mas), mas_data_end(mas), i);
 		MT_BUG_ON(mas->tree, 1);
 	}
@@ -7508,8 +7479,8 @@ static void mas_validate_limits(struct ma_state *mas)
 		void *entry = mas_slot(mas, slots, i);
 
 		if (entry && (i != mt_slots[type] - 1)) {
-			pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
-			       i, entry);
+			pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n",
+			       mas_mn(mas), i, entry);
 			MT_BUG_ON(mas->tree, entry != NULL);
 		}
 
@@ -7519,7 +7490,7 @@ static void mas_validate_limits(struct ma_state *mas)
 			if (!piv)
 				continue;
 
-			pr_err("%p[%u] should not have piv %lu\n",
+			pr_err(PTR_FMT "[%u] should not have piv %lu\n",
 			       mas_mn(mas), i, piv);
 			MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
 		}
@@ -7544,7 +7515,7 @@ static void mt_validate_nulls(struct maple_tree *mt)
 	do {
 		entry = mas_slot(&mas, slots, offset);
 		if (!last && !entry) {
-			pr_err("Sequential nulls end at %p[%u]\n",
+			pr_err("Sequential nulls end at " PTR_FMT "[%u]\n",
 				mas_mn(&mas), offset);
 		}
 		MT_BUG_ON(mt, !last && !entry);
@@ -7569,14 +7540,14 @@ static void mt_validate_nulls(struct maple_tree *mt)
  * 2. The gap is correctly set in the parents
  */
 void mt_validate(struct maple_tree *mt)
+	__must_hold(mas->tree->ma_lock)
 {
 	unsigned char end;
 
 	MA_STATE(mas, mt, 0, 0);
-	rcu_read_lock();
 	mas_start(&mas);
 	if (!mas_is_active(&mas))
-		goto done;
+		return;
 
 	while (!mte_is_leaf(mas.node))
 		mas_descend(&mas);
@@ -7585,8 +7556,9 @@ void mt_validate(struct maple_tree *mt)
 		MAS_WARN_ON(&mas, mte_dead_node(mas.node));
 		end = mas_data_end(&mas);
 		if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
-				(mas.max != ULONG_MAX))) {
-			pr_err("Invalid size %u of %p\n", end, mas_mn(&mas));
+				(!mte_is_root(mas.node)))) {
+			pr_err("Invalid size %u of " PTR_FMT "\n",
+			       end, mas_mn(&mas));
 		}
 
 		mas_validate_parent_slot(&mas);
@@ -7597,15 +7569,13 @@ void mt_validate(struct maple_tree *mt)
 		mas_dfs_postorder(&mas, ULONG_MAX);
 	}
 	mt_validate_nulls(mt);
-done:
-	rcu_read_unlock();
-
 }
 EXPORT_SYMBOL_GPL(mt_validate);
 
 void mas_dump(const struct ma_state *mas)
 {
-	pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node);
+	pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ",
+	       mas->tree, mas->node);
 	switch (mas->status) {
 	case ma_active:
 		pr_err("(ma_active)");
@@ -7633,9 +7603,43 @@ void mas_dump(const struct ma_state *mas)
 		break;
 	}
 
+	pr_err("Store Type: ");
+	switch (mas->store_type) {
+	case wr_invalid:
+		pr_err("invalid store type\n");
+		break;
+	case wr_new_root:
+		pr_err("new_root\n");
+		break;
+	case wr_store_root:
+		pr_err("store_root\n");
+		break;
+	case wr_exact_fit:
+		pr_err("exact_fit\n");
+		break;
+	case wr_split_store:
+		pr_err("split_store\n");
+		break;
+	case wr_slot_store:
+		pr_err("slot_store\n");
+		break;
+	case wr_append:
+		pr_err("append\n");
+		break;
+	case wr_node_store:
+		pr_err("node_store\n");
+		break;
+	case wr_spanning_store:
+		pr_err("spanning_store\n");
+		break;
+	case wr_rebalance:
+		pr_err("rebalance\n");
+		break;
+	}
+
 	pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
 	       mas->index, mas->last);
-	pr_err("     min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n",
+	pr_err("     min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n",
 	       mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
 	if (mas->index > mas->last)
 		pr_err("Check index & last\n");
@@ -7644,7 +7648,7 @@ EXPORT_SYMBOL_GPL(mas_dump);
 
 void mas_wr_dump(const struct ma_wr_state *wr_mas)
 {
-	pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n",
+	pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n",
 	       wr_mas->node, wr_mas->r_min, wr_mas->r_max);
 	pr_err("        type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
 	       wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
diff --git a/lib/math/Makefile b/lib/math/Makefile
index 91fcdb0c9efe..853f023ae537 100644
--- a/lib/math/Makefile
+++ b/lib/math/Makefile
@@ -5,5 +5,8 @@ obj-$(CONFIG_CORDIC)		+= cordic.o
 obj-$(CONFIG_PRIME_NUMBERS)	+= prime_numbers.o
 obj-$(CONFIG_RATIONAL)		+= rational.o
 
+obj-$(CONFIG_INT_POW_TEST)  += tests/int_pow_kunit.o
 obj-$(CONFIG_TEST_DIV64)	+= test_div64.o
+obj-$(CONFIG_TEST_MULDIV64)	+= test_mul_u64_u64_div_u64.o
 obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o
+obj-$(CONFIG_INT_SQRT_KUNIT_TEST) += tests/int_sqrt_kunit.o
+\ No newline at end of file
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 191761b1b623..5faa29208bdb 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -186,55 +186,84 @@ EXPORT_SYMBOL(iter_div_u64_rem);
 #ifndef mul_u64_u64_div_u64
 u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 {
-	u64 res = 0, div, rem;
-	int shift;
+	if (ilog2(a) + ilog2(b) <= 62)
+		return div64_u64(a * b, c);
 
-	/* can a * b overflow ? */
-	if (ilog2(a) + ilog2(b) > 62) {
-		/*
-		 * Note that the algorithm after the if block below might lose
-		 * some precision and the result is more exact for b > a. So
-		 * exchange a and b if a is bigger than b.
-		 *
-		 * For example with a = 43980465100800, b = 100000000, c = 1000000000
-		 * the below calculation doesn't modify b at all because div == 0
-		 * and then shift becomes 45 + 26 - 62 = 9 and so the result
-		 * becomes 4398035251080. However with a and b swapped the exact
-		 * result is calculated (i.e. 4398046510080).
-		 */
-		if (a > b)
-			swap(a, b);
+#if defined(__SIZEOF_INT128__)
+
+	/* native 64x64=128 bits multiplication */
+	u128 prod = (u128)a * b;
+	u64 n_lo = prod, n_hi = prod >> 64;
+
+#else
+
+	/* perform a 64x64=128 bits multiplication manually */
+	u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
+	u64 x, y, z;
+
+	x = (u64)a_lo * b_lo;
+	y = (u64)a_lo * b_hi + (u32)(x >> 32);
+	z = (u64)a_hi * b_hi + (u32)(y >> 32);
+	y = (u64)a_hi * b_lo + (u32)y;
+	z += (u32)(y >> 32);
+	x = (y << 32) + (u32)x;
+
+	u64 n_lo = x, n_hi = z;
+
+#endif
+
+	/* make sure c is not zero, trigger exception otherwise */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdiv-by-zero"
+	if (unlikely(c == 0))
+		return 1/0;
+#pragma GCC diagnostic pop
+
+	int shift = __builtin_ctzll(c);
 
+	/* try reducing the fraction in case the dividend becomes <= 64 bits */
+	if ((n_hi >> shift) == 0) {
+		u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo;
+
+		return div64_u64(n, c >> shift);
 		/*
-		 * (b * a) / c is equal to
-		 *
-		 *      (b / c) * a +
-		 *      (b % c) * a / c
-		 *
-		 * if nothing overflows. Can the 1st multiplication
-		 * overflow? Yes, but we do not care: this can only
-		 * happen if the end result can't fit in u64 anyway.
-		 *
-		 * So the code below does
-		 *
-		 *      res = (b / c) * a;
-		 *      b = b % c;
+		 * The remainder value if needed would be:
+		 *   res = div64_u64_rem(n, c >> shift, &rem);
+		 *   rem = (rem << shift) + (n_lo - (n << shift));
 		 */
-		div = div64_u64_rem(b, c, &rem);
-		res = div * a;
-		b = rem;
-
-		shift = ilog2(a) + ilog2(b) - 62;
-		if (shift > 0) {
-			/* drop precision */
-			b >>= shift;
-			c >>= shift;
-			if (!c)
-				return res;
-		}
 	}
 
-	return res + div64_u64(a * b, c);
+	if (n_hi >= c) {
+		/* overflow: result is unrepresentable in a u64 */
+		return -1;
+	}
+
+	/* Do the full 128 by 64 bits division */
+
+	shift = __builtin_clzll(c);
+	c <<= shift;
+
+	int p = 64 + shift;
+	u64 res = 0;
+	bool carry;
+
+	do {
+		carry = n_hi >> 63;
+		shift = carry ? 1 : __builtin_clzll(n_hi);
+		if (p < shift)
+			break;
+		p -= shift;
+		n_hi <<= shift;
+		n_hi |= n_lo >> (64 - shift);
+		n_lo <<= shift;
+		if (carry || (n_hi >= c)) {
+			n_hi -= c;
+			res |= 1ULL << p;
+		}
+	} while (n_hi);
+	/* The remainder value if needed would be n_hi << p */
+
+	return res;
 }
 EXPORT_SYMBOL(mul_u64_u64_div_u64);
 #endif
diff --git a/lib/math/prime_numbers.c b/lib/math/prime_numbers.c
index d3b64b10da1c..9a17ee9af93a 100644
--- a/lib/math/prime_numbers.c
+++ b/lib/math/prime_numbers.c
@@ -311,4 +311,5 @@ module_exit(primes_exit);
 module_param_named(selftest, selftest_max, ulong, 0400);
 
 MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Prime number library");
 MODULE_LICENSE("GPL");
diff --git a/lib/math/rational-test.c b/lib/math/rational-test.c
index 01611ddff420..47486a95f088 100644
--- a/lib/math/rational-test.c
+++ b/lib/math/rational-test.c
@@ -53,4 +53,5 @@ static struct kunit_suite rational_test_suite = {
 
 kunit_test_suites(&rational_test_suite);
 
+MODULE_DESCRIPTION("Rational fractions unit test");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/math/rational.c b/lib/math/rational.c
index ec59d426ea63..d2c34e629ee1 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -108,4 +108,5 @@ void rational_best_approximation(
 
 EXPORT_SYMBOL(rational_best_approximation);
 
+MODULE_DESCRIPTION("Rational fraction support library");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/math/test_div64.c b/lib/math/test_div64.c
index c15edd688dd2..3cd699b654d9 100644
--- a/lib/math/test_div64.c
+++ b/lib/math/test_div64.c
@@ -26,6 +26,9 @@ static const u64 test_div64_dividends[] = {
 	0x0072db27380dd689,
 	0x0842f488162e2284,
 	0xf66745411d8ab063,
+	0xfffffffffffffffb,
+	0xfffffffffffffffc,
+	0xffffffffffffffff,
 };
 #define SIZE_DIV64_DIVIDENDS ARRAY_SIZE(test_div64_dividends)
 
@@ -37,7 +40,10 @@ static const u64 test_div64_dividends[] = {
 #define TEST_DIV64_DIVISOR_5 0x0008a880
 #define TEST_DIV64_DIVISOR_6 0x003fd3ae
 #define TEST_DIV64_DIVISOR_7 0x0b658fac
-#define TEST_DIV64_DIVISOR_8 0xdc08b349
+#define TEST_DIV64_DIVISOR_8 0x80000001
+#define TEST_DIV64_DIVISOR_9 0xdc08b349
+#define TEST_DIV64_DIVISOR_A 0xfffffffe
+#define TEST_DIV64_DIVISOR_B 0xffffffff
 
 static const u32 test_div64_divisors[] = {
 	TEST_DIV64_DIVISOR_0,
@@ -49,13 +55,16 @@ static const u32 test_div64_divisors[] = {
 	TEST_DIV64_DIVISOR_6,
 	TEST_DIV64_DIVISOR_7,
 	TEST_DIV64_DIVISOR_8,
+	TEST_DIV64_DIVISOR_9,
+	TEST_DIV64_DIVISOR_A,
+	TEST_DIV64_DIVISOR_B,
 };
 #define SIZE_DIV64_DIVISORS ARRAY_SIZE(test_div64_divisors)
 
 static const struct {
 	u64 quotient;
 	u32 remainder;
-} test_div64_results[SIZE_DIV64_DIVISORS][SIZE_DIV64_DIVIDENDS] = {
+} test_div64_results[SIZE_DIV64_DIVIDENDS][SIZE_DIV64_DIVISORS] = {
 	{
 		{ 0x0000000013045e47, 0x00000001 },
 		{ 0x000000000161596c, 0x00000030 },
@@ -65,6 +74,9 @@ static const struct {
 		{ 0x00000000000013c4, 0x0004ce80 },
 		{ 0x00000000000002ae, 0x001e143c },
 		{ 0x000000000000000f, 0x0033e56c },
+		{ 0x0000000000000001, 0x2b27507f },
+		{ 0x0000000000000000, 0xab275080 },
+		{ 0x0000000000000000, 0xab275080 },
 		{ 0x0000000000000000, 0xab275080 },
 	}, {
 		{ 0x00000001c45c02d1, 0x00000000 },
@@ -75,7 +87,10 @@ static const struct {
 		{ 0x000000000001d637, 0x0004e5d9 },
 		{ 0x0000000000003fc9, 0x000713bb },
 		{ 0x0000000000000165, 0x029abe7d },
+		{ 0x000000000000001f, 0x673c193a },
 		{ 0x0000000000000012, 0x6e9f7e37 },
+		{ 0x000000000000000f, 0xe73c1977 },
+		{ 0x000000000000000f, 0xe73c1968 },
 	}, {
 		{ 0x000000197a3a0cf7, 0x00000002 },
 		{ 0x00000001d9632e5c, 0x00000021 },
@@ -85,7 +100,10 @@ static const struct {
 		{ 0x00000000001a7bb3, 0x00072331 },
 		{ 0x00000000000397ad, 0x0002c61b },
 		{ 0x000000000000141e, 0x06ea2e89 },
+		{ 0x00000000000001ca, 0x4c0a72e7 },
 		{ 0x000000000000010a, 0xab002ad7 },
+		{ 0x00000000000000e5, 0x4c0a767b },
+		{ 0x00000000000000e5, 0x4c0a7596 },
 	}, {
 		{ 0x0000017949e37538, 0x00000001 },
 		{ 0x0000001b62441f37, 0x00000055 },
@@ -95,7 +113,10 @@ static const struct {
 		{ 0x0000000001882ec6, 0x0005cbf9 },
 		{ 0x000000000035333b, 0x0017abdf },
 		{ 0x00000000000129f1, 0x0ab4520d },
+		{ 0x0000000000001a87, 0x18ff0472 },
 		{ 0x0000000000000f6e, 0x8ac0ce9b },
+		{ 0x0000000000000d43, 0x98ff397f },
+		{ 0x0000000000000d43, 0x98ff2c3c },
 	}, {
 		{ 0x000011f321a74e49, 0x00000006 },
 		{ 0x0000014d8481d211, 0x0000005b },
@@ -105,7 +126,10 @@ static const struct {
 		{ 0x0000000012a88828, 0x00036c97 },
 		{ 0x000000000287f16f, 0x002c2a25 },
 		{ 0x00000000000e2cc7, 0x02d581e3 },
+		{ 0x0000000000014318, 0x2ee07d7f },
 		{ 0x000000000000bbf4, 0x1ba08c03 },
+		{ 0x000000000000a18c, 0x2ee303af },
+		{ 0x000000000000a18c, 0x2ee26223 },
 	}, {
 		{ 0x0000d8db8f72935d, 0x00000005 },
 		{ 0x00000fbd5aed7a2e, 0x00000002 },
@@ -115,7 +139,10 @@ static const struct {
 		{ 0x00000000e16b20fa, 0x0002a14a },
 		{ 0x000000001e940d22, 0x00353b2e },
 		{ 0x0000000000ab40ac, 0x06fba6ba },
+		{ 0x00000000000f3f70, 0x0af7eeda },
 		{ 0x000000000008debd, 0x72d98365 },
+		{ 0x0000000000079fb8, 0x0b166dba },
+		{ 0x0000000000079fb8, 0x0b0ece02 },
 	}, {
 		{ 0x000cc3045b8fc281, 0x00000000 },
 		{ 0x0000ed1f48b5c9fc, 0x00000079 },
@@ -125,7 +152,10 @@ static const struct {
 		{ 0x0000000d43fce827, 0x00082b09 },
 		{ 0x00000001ccaba11a, 0x0037e8dd },
 		{ 0x000000000a13f729, 0x0566dffd },
+		{ 0x0000000000e5b64e, 0x3728203b },
 		{ 0x000000000085a14b, 0x23d36726 },
+		{ 0x000000000072db27, 0x38f38cd7 },
+		{ 0x000000000072db27, 0x3880b1b0 },
 	}, {
 		{ 0x00eafeb9c993592b, 0x00000001 },
 		{ 0x00110e5befa9a991, 0x00000048 },
@@ -135,7 +165,10 @@ static const struct {
 		{ 0x000000f4459740fc, 0x00084484 },
 		{ 0x0000002122c47bf9, 0x002ca446 },
 		{ 0x00000000b9936290, 0x004979c4 },
+		{ 0x000000001085e910, 0x05a83974 },
 		{ 0x00000000099ca89d, 0x9db446bf },
+		{ 0x000000000842f488, 0x26b40b94 },
+		{ 0x000000000842f488, 0x1e71170c },
 	}, {
 		{ 0x1b60cece589da1d2, 0x00000001 },
 		{ 0x01fcb42be1453f5b, 0x0000004f },
@@ -145,7 +178,49 @@ static const struct {
 		{ 0x00001c757dfab350, 0x00048863 },
 		{ 0x000003dc4979c652, 0x00224ea7 },
 		{ 0x000000159edc3144, 0x06409ab3 },
+		{ 0x00000001ecce8a7e, 0x30bc25e5 },
 		{ 0x000000011eadfee3, 0xa99c48a8 },
+		{ 0x00000000f6674543, 0x0a593ae9 },
+		{ 0x00000000f6674542, 0x13f1f5a5 },
+	}, {
+		{ 0x1c71c71c71c71c71, 0x00000002 },
+		{ 0x0210842108421084, 0x0000000b },
+		{ 0x007f01fc07f01fc0, 0x000000fb },
+		{ 0x00014245eabf1f9a, 0x0000a63d },
+		{ 0x0000ffffffffffff, 0x0000fffb },
+		{ 0x00001d913cecc509, 0x0007937b },
+		{ 0x00000402c70c678f, 0x0005bfc9 },
+		{ 0x00000016766cb70b, 0x045edf97 },
+		{ 0x00000001fffffffb, 0x80000000 },
+		{ 0x0000000129d84b3a, 0xa2e8fe71 },
+		{ 0x0000000100000001, 0xfffffffd },
+		{ 0x0000000100000000, 0xfffffffb },
+	}, {
+		{ 0x1c71c71c71c71c71, 0x00000003 },
+		{ 0x0210842108421084, 0x0000000c },
+		{ 0x007f01fc07f01fc0, 0x000000fc },
+		{ 0x00014245eabf1f9a, 0x0000a63e },
+		{ 0x0000ffffffffffff, 0x0000fffc },
+		{ 0x00001d913cecc509, 0x0007937c },
+		{ 0x00000402c70c678f, 0x0005bfca },
+		{ 0x00000016766cb70b, 0x045edf98 },
+		{ 0x00000001fffffffc, 0x00000000 },
+		{ 0x0000000129d84b3a, 0xa2e8fe72 },
+		{ 0x0000000100000002, 0x00000000 },
+		{ 0x0000000100000000, 0xfffffffc },
+	}, {
+		{ 0x1c71c71c71c71c71, 0x00000006 },
+		{ 0x0210842108421084, 0x0000000f },
+		{ 0x007f01fc07f01fc0, 0x000000ff },
+		{ 0x00014245eabf1f9a, 0x0000a641 },
+		{ 0x0000ffffffffffff, 0x0000ffff },
+		{ 0x00001d913cecc509, 0x0007937f },
+		{ 0x00000402c70c678f, 0x0005bfcd },
+		{ 0x00000016766cb70b, 0x045edf9b },
+		{ 0x00000001fffffffc, 0x00000003 },
+		{ 0x0000000129d84b3a, 0xa2e8fe75 },
+		{ 0x0000000100000002, 0x00000003 },
+		{ 0x0000000100000001, 0x00000000 },
 	},
 };
 
@@ -208,6 +283,12 @@ static bool __init test_div64(void)
 			return false;
 		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_8, i, 8))
 			return false;
+		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_9, i, 9))
+			return false;
+		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_A, i, 10))
+			return false;
+		if (!test_div64_one(dividend, TEST_DIV64_DIVISOR_B, i, 11))
+			return false;
 		for (j = 0; j < SIZE_DIV64_DIVISORS; j++) {
 			if (!test_div64_one(dividend, test_div64_divisors[j],
 					    i, j))
diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c
new file mode 100644
index 000000000000..58d058de4e73
--- /dev/null
+++ b/lib/math/test_mul_u64_u64_div_u64.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 BayLibre SAS
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/math64.h>
+
+typedef struct { u64 a; u64 b; u64 c; u64 result; } test_params;
+
+static test_params test_values[] = {
+/* this contains many edge values followed by a couple random values */
+{                0xb,                0x7,                0x3,               0x19 },
+{         0xffff0000,         0xffff0000,                0xf, 0x1110eeef00000000 },
+{         0xffffffff,         0xffffffff,                0x1, 0xfffffffe00000001 },
+{         0xffffffff,         0xffffffff,                0x2, 0x7fffffff00000000 },
+{        0x1ffffffff,         0xffffffff,                0x2, 0xfffffffe80000000 },
+{        0x1ffffffff,         0xffffffff,                0x3, 0xaaaaaaa9aaaaaaab },
+{        0x1ffffffff,        0x1ffffffff,                0x4, 0xffffffff00000000 },
+{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff },
+{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851 },
+{ 0x7fffffffffffffff,                0x2,                0x3, 0x5555555555555554 },
+{ 0xffffffffffffffff,                0x2, 0x8000000000000000,                0x3 },
+{ 0xffffffffffffffff,                0x2, 0xc000000000000000,                0x2 },
+{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007 },
+{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001 },
+{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002 },
+{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8 },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b },
+{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9 },
+{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe },
+{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18 },
+{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35 },
+{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f },
+{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961 },
+{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216 },
+};
+
+/*
+ * The above table can be verified with the following shell script:
+ *
+ * #!/bin/sh
+ * sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4/p' \
+ *     lib/math/test_mul_u64_u64_div_u64.c |
+ * while read a b c r; do
+ *   expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $c | bc )
+ *   given=$( printf "%X\n" $r )
+ *   if [ "$expected" = "$given" ]; then
+ *     echo "$a * $b / $c = $r OK"
+ *   else
+ *     echo "$a * $b / $c = $r is wrong" >&2
+ *     echo "should be equivalent to 0x$expected" >&2
+ *     exit 1
+ *   fi
+ * done
+ */
+
+static int __init test_init(void)
+{
+	int i;
+
+	pr_info("Starting mul_u64_u64_div_u64() test\n");
+
+	for (i = 0; i < ARRAY_SIZE(test_values); i++) {
+		u64 a = test_values[i].a;
+		u64 b = test_values[i].b;
+		u64 c = test_values[i].c;
+		u64 expected_result = test_values[i].result;
+		u64 result = mul_u64_u64_div_u64(a, b, c);
+
+		if (result != expected_result) {
+			pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, c);
+			pr_err("ERROR: expected result: %016llx\n", expected_result);
+			pr_err("ERROR: obtained result: %016llx\n", result);
+		}
+	}
+
+	pr_info("Completed mul_u64_u64_div_u64() test\n");
+	return 0;
+}
+
+static void __exit test_exit(void)
+{
+}
+
+module_init(test_init);
+module_exit(test_exit);
+
+MODULE_AUTHOR("Nicolas Pitre");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("mul_u64_u64_div_u64() test module");
diff --git a/lib/math/tests/Makefile b/lib/math/tests/Makefile
new file mode 100644
index 000000000000..e1a79f093b2d
--- /dev/null
+++ b/lib/math/tests/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_INT_POW_TEST) += int_pow_kunit.o
+obj-$(CONFIG_INT_SQRT_KUNIT_TEST) += int_sqrt_kunit.o
diff --git a/lib/math/tests/int_pow_kunit.c b/lib/math/tests/int_pow_kunit.c
new file mode 100644
index 000000000000..34b33677d458
--- /dev/null
+++ b/lib/math/tests/int_pow_kunit.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <kunit/test.h>
+#include <linux/math.h>
+
+struct test_case_params {
+	u64 base;
+	unsigned int exponent;
+	u64 expected_result;
+	const char *name;
+};
+
+static const struct test_case_params params[] = {
+	{ 64, 0, 1, "Power of zero" },
+	{ 64, 1, 64, "Power of one"},
+	{ 0, 5, 0, "Base zero" },
+	{ 1, 64, 1, "Base one" },
+	{ 2, 2, 4, "Two squared"},
+	{ 2, 3, 8, "Two cubed"},
+	{ 5, 5, 3125, "Five raised to the fifth power" },
+	{ U64_MAX, 1, U64_MAX, "Max base" },
+	{ 2, 63, 9223372036854775808ULL, "Large result"},
+};
+
+static void get_desc(const struct test_case_params *tc, char *desc)
+{
+	strscpy(desc, tc->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(int_pow, params, get_desc);
+
+static void int_pow_test(struct kunit *test)
+{
+	const struct test_case_params *tc = (const struct test_case_params *)test->param_value;
+
+	KUNIT_EXPECT_EQ(test, tc->expected_result, int_pow(tc->base, tc->exponent));
+}
+
+static struct kunit_case math_int_pow_test_cases[] = {
+	KUNIT_CASE_PARAM(int_pow_test, int_pow_gen_params),
+	{}
+};
+
+static struct kunit_suite int_pow_test_suite = {
+	.name = "math-int_pow",
+	.test_cases = math_int_pow_test_cases,
+};
+
+kunit_test_suites(&int_pow_test_suite);
+
+MODULE_DESCRIPTION("math.int_pow KUnit test suite");
+MODULE_LICENSE("GPL");
diff --git a/lib/math/tests/int_sqrt_kunit.c b/lib/math/tests/int_sqrt_kunit.c
new file mode 100644
index 000000000000..1798e1312eb7
--- /dev/null
+++ b/lib/math/tests/int_sqrt_kunit.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <kunit/test.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+struct test_case_params {
+	unsigned long x;
+	unsigned long expected_result;
+	const char *name;
+};
+
+static const struct test_case_params params[] = {
+	{ 0, 0, "edge case: square root of 0" },
+	{ 1, 1, "perfect square: square root of 1" },
+	{ 2, 1, "non-perfect square: square root of 2" },
+	{ 3, 1, "non-perfect square: square root of 3" },
+	{ 4, 2, "perfect square: square root of 4" },
+	{ 5, 2, "non-perfect square: square root of 5" },
+	{ 6, 2, "non-perfect square: square root of 6" },
+	{ 7, 2, "non-perfect square: square root of 7" },
+	{ 8, 2, "non-perfect square: square root of 8" },
+	{ 9, 3, "perfect square: square root of 9" },
+	{ 15, 3, "non-perfect square: square root of 15 (N-1 from 16)" },
+	{ 16, 4, "perfect square: square root of 16" },
+	{ 17, 4, "non-perfect square: square root of 17 (N+1 from 16)" },
+	{ 80, 8, "non-perfect square: square root of 80 (N-1 from 81)" },
+	{ 81, 9, "perfect square: square root of 81" },
+	{ 82, 9, "non-perfect square: square root of 82 (N+1 from 81)" },
+	{ 255, 15, "non-perfect square: square root of 255 (N-1 from 256)" },
+	{ 256, 16, "perfect square: square root of 256" },
+	{ 257, 16, "non-perfect square: square root of 257 (N+1 from 256)" },
+	{ 2147483648, 46340, "large input: square root of 2147483648" },
+	{ 4294967295, 65535, "edge case: ULONG_MAX for 32-bit" },
+};
+
+static void get_desc(const struct test_case_params *tc, char *desc)
+{
+	strscpy(desc, tc->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(int_sqrt, params, get_desc);
+
+static void int_sqrt_test(struct kunit *test)
+{
+	const struct test_case_params *tc = (const struct test_case_params *)test->param_value;
+
+	KUNIT_EXPECT_EQ(test, tc->expected_result, int_sqrt(tc->x));
+}
+
+static struct kunit_case math_int_sqrt_test_cases[] = {
+	KUNIT_CASE_PARAM(int_sqrt_test, int_sqrt_gen_params),
+	{}
+};
+
+static struct kunit_suite int_sqrt_test_suite = {
+	.name = "math-int_sqrt",
+	.test_cases = math_int_sqrt_test_cases,
+};
+
+kunit_test_suites(&int_sqrt_test_suite);
+
+MODULE_DESCRIPTION("math.int_sqrt KUnit test suite");
+MODULE_LICENSE("GPL");
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 20ea9038c3ff..d36933554e46 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -510,4 +510,5 @@ static struct kunit_suite memcpy_test_suite = {
 
 kunit_test_suite(memcpy_test_suite);
 
+MODULE_DESCRIPTION("test cases for memcpy(), memmove(), and memset()");
 MODULE_LICENSE("GPL");
diff --git a/lib/min_heap.c b/lib/min_heap.c
new file mode 100644
index 000000000000..4485372ff3b1
--- /dev/null
+++ b/lib/min_heap.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/min_heap.h>
+
+void __min_heap_init(min_heap_char *heap, void *data, int size)
+{
+	__min_heap_init_inline(heap, data, size);
+}
+EXPORT_SYMBOL(__min_heap_init);
+
+void *__min_heap_peek(struct min_heap_char *heap)
+{
+	return __min_heap_peek_inline(heap);
+}
+EXPORT_SYMBOL(__min_heap_peek);
+
+bool __min_heap_full(min_heap_char *heap)
+{
+	return __min_heap_full_inline(heap);
+}
+EXPORT_SYMBOL(__min_heap_full);
+
+void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
+			  const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_sift_down_inline(heap, pos, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_down);
+
+void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
+			const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_up);
+
+void __min_heapify_all(min_heap_char *heap, size_t elem_size,
+		       const struct min_heap_callbacks *func, void *args)
+{
+	__min_heapify_all_inline(heap, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heapify_all);
+
+bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
+		    const struct min_heap_callbacks *func, void *args)
+{
+	return __min_heap_pop_inline(heap, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_pop);
+
+void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
+			 const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_pop_push_inline(heap, element, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_pop_push);
+
+bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
+		     const struct min_heap_callbacks *func, void *args)
+{
+	return __min_heap_push_inline(heap, element, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_push);
+
+bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
+		    const struct min_heap_callbacks *func, void *args)
+{
+	return __min_heap_del_inline(heap, elem_size, idx, func, args);
+}
+EXPORT_SYMBOL(__min_heap_del);
diff --git a/lib/objagg.c b/lib/objagg.c
index 1e248629ed64..363e43e849ac 100644
--- a/lib/objagg.c
+++ b/lib/objagg.c
@@ -167,6 +167,9 @@ static int objagg_obj_parent_assign(struct objagg *objagg,
 {
 	void *delta_priv;
 
+	if (WARN_ON(!objagg_obj_is_root(parent)))
+		return -EINVAL;
+
 	delta_priv = objagg->ops->delta_create(objagg->priv, parent->obj,
 					       objagg_obj->obj);
 	if (IS_ERR(delta_priv))
@@ -421,7 +424,7 @@ static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj)
  *
  * There are 3 main options this function wraps:
  * 1) The object according to "obj" already exist. In that case
- *    the reference counter is incrementes and the object is returned.
+ *    the reference counter is incremented and the object is returned.
  * 2) The object does not exist, but it can be aggregated within
  *    another object. In that case, user ops->delta_create() is called
  *    to obtain delta data and a new object is created with returned
@@ -903,20 +906,6 @@ static const struct objagg_opt_algo *objagg_opt_algos[] = {
 	[OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy,
 };
 
-static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg,
-				const void *obj)
-{
-	struct rhashtable *ht = arg->ht;
-	struct objagg_hints *objagg_hints =
-			container_of(ht, struct objagg_hints, node_ht);
-	const struct objagg_ops *ops = objagg_hints->ops;
-	const char *ptr = obj;
-
-	ptr += ht->p.key_offset;
-	return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) :
-				    memcmp(ptr, arg->key, ht->p.key_len);
-}
-
 /**
  * objagg_hints_get - obtains hints instance
  * @objagg:		objagg instance
@@ -955,7 +944,6 @@ struct objagg_hints *objagg_hints_get(struct objagg *objagg,
 				offsetof(struct objagg_hints_node, obj);
 	objagg_hints->ht_params.head_offset =
 				offsetof(struct objagg_hints_node, ht_node);
-	objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp;
 
 	err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params);
 	if (err)
diff --git a/lib/objpool.c b/lib/objpool.c
index cfdc02420884..b998b720c732 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -50,7 +50,7 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
 {
 	int i, cpu_count = 0;
 
-	for (i = 0; i < pool->nr_cpus; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 
 		struct objpool_slot *slot;
 		int nodes, size, rc;
@@ -60,8 +60,8 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
 			continue;
 
 		/* compute how many objects to be allocated with this slot */
-		nodes = nr_objs / num_possible_cpus();
-		if (cpu_count < (nr_objs % num_possible_cpus()))
+		nodes = nr_objs / pool->nr_possible_cpus;
+		if (cpu_count < (nr_objs % pool->nr_possible_cpus))
 			nodes++;
 		cpu_count++;
 
@@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
 		 * warm caches and TLB hits. in default vmalloc is used to
 		 * reduce the pressure of kernel slab system. as we know,
 		 * mimimal size of vmalloc is one page since vmalloc would
-		 * always align the requested size to page size
+		 * always align the requested size to page size.
+		 * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC)
+		 * allocate percpu slot with kmalloc.
 		 */
-		if (pool->gfp & GFP_ATOMIC)
-			slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
-		else
+		slot = NULL;
+
+		if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC)
 			slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
 				cpu_to_node(i), __builtin_return_address(0));
-		if (!slot)
-			return -ENOMEM;
+
+		if (!slot) {
+			slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
+			if (!slot)
+				return -ENOMEM;
+		}
 		memset(slot, 0, size);
 		pool->cpu_slots[i] = slot;
 
@@ -103,7 +109,7 @@ static void objpool_fini_percpu_slots(struct objpool_head *pool)
 	if (!pool->cpu_slots)
 		return;
 
-	for (i = 0; i < pool->nr_cpus; i++)
+	for (i = 0; i < nr_cpu_ids; i++)
 		kvfree(pool->cpu_slots[i]);
 	kfree(pool->cpu_slots);
 }
@@ -130,13 +136,13 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
 
 	/* initialize objpool pool */
 	memset(pool, 0, sizeof(struct objpool_head));
-	pool->nr_cpus = nr_cpu_ids;
+	pool->nr_possible_cpus = num_possible_cpus();
 	pool->obj_size = object_size;
 	pool->capacity = capacity;
 	pool->gfp = gfp & ~__GFP_ZERO;
 	pool->context = context;
 	pool->release = release;
-	slot_size = pool->nr_cpus * sizeof(struct objpool_slot);
+	slot_size = nr_cpu_ids * sizeof(struct objpool_slot);
 	pool->cpu_slots = kzalloc(slot_size, pool->gfp);
 	if (!pool->cpu_slots)
 		return -ENOMEM;
@@ -152,106 +158,6 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
 }
 EXPORT_SYMBOL_GPL(objpool_init);
 
-/* adding object to slot, abort if the slot was already full */
-static inline int
-objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu)
-{
-	struct objpool_slot *slot = pool->cpu_slots[cpu];
-	uint32_t head, tail;
-
-	/* loading tail and head as a local snapshot, tail first */
-	tail = READ_ONCE(slot->tail);
-
-	do {
-		head = READ_ONCE(slot->head);
-		/* fault caught: something must be wrong */
-		WARN_ON_ONCE(tail - head > pool->nr_objs);
-	} while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1));
-
-	/* now the tail position is reserved for the given obj */
-	WRITE_ONCE(slot->entries[tail & slot->mask], obj);
-	/* update sequence to make this obj available for pop() */
-	smp_store_release(&slot->last, tail + 1);
-
-	return 0;
-}
-
-/* reclaim an object to object pool */
-int objpool_push(void *obj, struct objpool_head *pool)
-{
-	unsigned long flags;
-	int rc;
-
-	/* disable local irq to avoid preemption & interruption */
-	raw_local_irq_save(flags);
-	rc = objpool_try_add_slot(obj, pool, raw_smp_processor_id());
-	raw_local_irq_restore(flags);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(objpool_push);
-
-/* try to retrieve object from slot */
-static inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu)
-{
-	struct objpool_slot *slot = pool->cpu_slots[cpu];
-	/* load head snapshot, other cpus may change it */
-	uint32_t head = smp_load_acquire(&slot->head);
-
-	while (head != READ_ONCE(slot->last)) {
-		void *obj;
-
-		/*
-		 * data visibility of 'last' and 'head' could be out of
-		 * order since memory updating of 'last' and 'head' are
-		 * performed in push() and pop() independently
-		 *
-		 * before any retrieving attempts, pop() must guarantee
-		 * 'last' is behind 'head', that is to say, there must
-		 * be available objects in slot, which could be ensured
-		 * by condition 'last != head && last - head <= nr_objs'
-		 * that is equivalent to 'last - head - 1 < nr_objs' as
-		 * 'last' and 'head' are both unsigned int32
-		 */
-		if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) {
-			head = READ_ONCE(slot->head);
-			continue;
-		}
-
-		/* obj must be retrieved before moving forward head */
-		obj = READ_ONCE(slot->entries[head & slot->mask]);
-
-		/* move head forward to mark it's consumption */
-		if (try_cmpxchg_release(&slot->head, &head, head + 1))
-			return obj;
-	}
-
-	return NULL;
-}
-
-/* allocate an object from object pool */
-void *objpool_pop(struct objpool_head *pool)
-{
-	void *obj = NULL;
-	unsigned long flags;
-	int i, cpu;
-
-	/* disable local irq to avoid preemption & interruption */
-	raw_local_irq_save(flags);
-
-	cpu = raw_smp_processor_id();
-	for (i = 0; i < num_possible_cpus(); i++) {
-		obj = objpool_try_get_slot(pool, cpu);
-		if (obj)
-			break;
-		cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
-	}
-	raw_local_irq_restore(flags);
-
-	return obj;
-}
-EXPORT_SYMBOL_GPL(objpool_pop);
-
 /* release whole objpool forcely */
 void objpool_free(struct objpool_head *pool)
 {
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index 4ef31b0bb74d..5222c6393f11 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -668,7 +668,6 @@ DEFINE_TEST_ALLOC(devm_kzalloc,  devm_kfree, 1, 1, 0);
 
 static void overflow_allocation_test(struct kunit *test)
 {
-	const char device_name[] = "overflow-test";
 	struct device *dev;
 	int count = 0;
 
@@ -678,7 +677,7 @@ static void overflow_allocation_test(struct kunit *test)
 } while (0)
 
 	/* Create dummy device for devm_kmalloc()-family tests. */
-	dev = kunit_device_register(test, device_name);
+	dev = kunit_device_register(test, "overflow-test");
 	KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev),
 			       "Cannot register test device\n");
 
@@ -1178,14 +1177,28 @@ struct foo {
 	s16 array[] __counted_by(counter);
 };
 
+struct bar {
+	int a;
+	u32 counter;
+	s16 array[];
+};
+
 static void DEFINE_FLEX_test(struct kunit *test)
 {
-	DEFINE_RAW_FLEX(struct foo, two, array, 2);
+	/* Using _RAW_ on a __counted_by struct will initialize "counter" to zero */
+	DEFINE_RAW_FLEX(struct foo, two_but_zero, array, 2);
+#ifdef CONFIG_CC_HAS_COUNTED_BY
+	int expected_raw_size = sizeof(struct foo);
+#else
+	int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16);
+#endif
+	/* Without annotation, it will always be on-stack size. */
+	DEFINE_RAW_FLEX(struct bar, two, array, 2);
 	DEFINE_FLEX(struct foo, eight, array, counter, 8);
 	DEFINE_FLEX(struct foo, empty, array, counter, 0);
 
-	KUNIT_EXPECT_EQ(test, __struct_size(two),
-			sizeof(struct foo) + sizeof(s16) + sizeof(s16));
+	KUNIT_EXPECT_EQ(test, __struct_size(two_but_zero), expected_raw_size);
+	KUNIT_EXPECT_EQ(test, __struct_size(two), sizeof(struct bar) + 2 * sizeof(s16));
 	KUNIT_EXPECT_EQ(test, __struct_size(eight), 24);
 	KUNIT_EXPECT_EQ(test, __struct_size(empty), sizeof(struct foo));
 }
@@ -1223,4 +1236,5 @@ static struct kunit_suite overflow_test_suite = {
 
 kunit_test_suite(overflow_test_suite);
 
+MODULE_DESCRIPTION("Test cases for arithmetic overflow checks");
 MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/packing.c b/lib/packing.c
index 3f656167c17e..bb1643d9e64d 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -5,56 +5,163 @@
 #include <linux/packing.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/bits.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/bitrev.h>
 
-static int get_le_offset(int offset)
-{
-	int closest_multiple_of_4;
+#define __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks)	\
+	({									\
+		for (size_t i = 0; i < (num_fields); i++) {			\
+			typeof(&(fields)[0]) field = &(fields)[i];		\
+			u64 uval;						\
+										\
+			uval = ustruct_field_to_u64(ustruct, field->offset, field->size); \
+										\
+			__pack(pbuf, uval, field->startbit, field->endbit,	\
+			       pbuflen, quirks);				\
+		}								\
+	})
 
-	closest_multiple_of_4 = (offset / 4) * 4;
-	offset -= closest_multiple_of_4;
-	return closest_multiple_of_4 + (3 - offset);
-}
+#define __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks)	\
+	({									\
+		for (size_t i = 0; i < (num_fields); i++) {			\
+			typeof(&(fields)[0]) field = &fields[i];		\
+			u64 uval;						\
+										\
+			__unpack(pbuf, &uval, field->startbit, field->endbit,	\
+				 pbuflen, quirks);				\
+										\
+			u64_to_ustruct_field(ustruct, field->offset, field->size, uval); \
+		}								\
+	})
 
-static int get_reverse_lsw32_offset(int offset, size_t len)
+/**
+ * calculate_box_addr - Determine physical location of byte in buffer
+ * @box: Index of byte within buffer seen as a logical big-endian big number
+ * @len: Size of buffer in bytes
+ * @quirks: mask of QUIRK_LSW32_IS_FIRST and QUIRK_LITTLE_ENDIAN
+ *
+ * Function interprets the buffer as a @len byte sized big number, and returns
+ * the physical offset of the @box logical octet within it. Internally, it
+ * treats the big number as groups of 4 bytes. If @len is not a multiple of 4,
+ * the last group may be shorter.
+ *
+ * @QUIRK_LSW32_IS_FIRST gives the ordering of groups of 4 octets relative to
+ * each other. If set, the most significant group of 4 octets is last in the
+ * buffer (and may be truncated if @len is not a multiple of 4).
+ *
+ * @QUIRK_LITTLE_ENDIAN gives the ordering of bytes within each group of 4.
+ * If set, the most significant byte is last in the group. If @len takes the
+ * form of 4k+3, the last group will only be able to represent 24 bits, and its
+ * most significant octet is byte 2.
+ *
+ * Return: the physical offset into the buffer corresponding to the logical box.
+ */
+static size_t calculate_box_addr(size_t box, size_t len, u8 quirks)
 {
-	int closest_multiple_of_4;
-	int word_index;
-
-	word_index = offset / 4;
-	closest_multiple_of_4 = word_index * 4;
-	offset -= closest_multiple_of_4;
-	word_index = (len / 4) - word_index - 1;
-	return word_index * 4 + offset;
+	size_t offset_of_group, offset_in_group, this_group = box / 4;
+	size_t group_size;
+
+	if (quirks & QUIRK_LSW32_IS_FIRST)
+		offset_of_group = this_group * 4;
+	else
+		offset_of_group = len - ((this_group + 1) * 4);
+
+	group_size = min(4, len - offset_of_group);
+
+	if (quirks & QUIRK_LITTLE_ENDIAN)
+		offset_in_group = box - this_group * 4;
+	else
+		offset_in_group = group_size - (box - this_group * 4) - 1;
+
+	return offset_of_group + offset_in_group;
 }
 
-static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
-				       int *box_end_bit, u8 *box_mask)
+static void __pack(void *pbuf, u64 uval, size_t startbit, size_t endbit,
+		   size_t pbuflen, u8 quirks)
 {
-	int box_bit_width = *box_start_bit - *box_end_bit + 1;
-	int new_box_start_bit, new_box_end_bit;
-
-	*to_write >>= *box_end_bit;
-	*to_write = bitrev8(*to_write) >> (8 - box_bit_width);
-	*to_write <<= *box_end_bit;
-
-	new_box_end_bit   = box_bit_width - *box_start_bit - 1;
-	new_box_start_bit = box_bit_width - *box_end_bit - 1;
-	*box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit);
-	*box_start_bit = new_box_start_bit;
-	*box_end_bit   = new_box_end_bit;
+	/* Logical byte indices corresponding to the
+	 * start and end of the field.
+	 */
+	int plogical_first_u8 = startbit / BITS_PER_BYTE;
+	int plogical_last_u8 = endbit / BITS_PER_BYTE;
+	int value_width = startbit - endbit + 1;
+	int box;
+
+	/* Check if "uval" fits in "value_width" bits.
+	 * The test only works for value_width < 64, but in the latter case,
+	 * any 64-bit uval will surely fit.
+	 */
+	WARN(value_width < 64 && uval >= (1ull << value_width),
+	     "Cannot store 0x%llx inside bits %zu-%zu - will truncate\n",
+	     uval, startbit, endbit);
+
+	/* Iterate through an idealistic view of the pbuf as an u64 with
+	 * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
+	 * logical bit significance. "box" denotes the current logical u8.
+	 */
+	for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
+		/* Bit indices into the currently accessed 8-bit box */
+		size_t box_start_bit, box_end_bit, box_addr;
+		u8  box_mask;
+		/* Corresponding bits from the unpacked u64 parameter */
+		size_t proj_start_bit, proj_end_bit;
+		u64 proj_mask;
+		u64 pval;
+
+		/* This u8 may need to be accessed in its entirety
+		 * (from bit 7 to bit 0), or not, depending on the
+		 * input arguments startbit and endbit.
+		 */
+		if (box == plogical_first_u8)
+			box_start_bit = startbit % BITS_PER_BYTE;
+		else
+			box_start_bit = 7;
+		if (box == plogical_last_u8)
+			box_end_bit = endbit % BITS_PER_BYTE;
+		else
+			box_end_bit = 0;
+
+		/* We have determined the box bit start and end.
+		 * Now we calculate where this (masked) u8 box would fit
+		 * in the unpacked (CPU-readable) u64 - the u8 box's
+		 * projection onto the unpacked u64. Though the
+		 * box is u8, the projection is u64 because it may fall
+		 * anywhere within the unpacked u64.
+		 */
+		proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit;
+		proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit;
+		proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit);
+		box_mask = GENMASK(box_start_bit, box_end_bit);
+
+		/* Determine the offset of the u8 box inside the pbuf,
+		 * adjusted for quirks. The adjusted box_addr will be used for
+		 * effective addressing inside the pbuf (so it's not
+		 * logical any longer).
+		 */
+		box_addr = calculate_box_addr(box, pbuflen, quirks);
+
+		/* Write to pbuf, read from uval */
+		pval = uval & proj_mask;
+		pval >>= proj_end_bit;
+		pval <<= box_end_bit;
+
+		if (quirks & QUIRK_MSB_ON_THE_RIGHT) {
+			pval = bitrev8(pval);
+			box_mask = bitrev8(box_mask);
+		}
+
+		((u8 *)pbuf)[box_addr] &= ~box_mask;
+		((u8 *)pbuf)[box_addr] |= pval;
+	}
 }
 
 /**
- * packing - Convert numbers (currently u64) between a packed and an unpacked
- *	     format. Unpacked means laid out in memory in the CPU's native
- *	     understanding of integers, while packed means anything else that
- *	     requires translation.
+ * pack - Pack u64 number into bitfield of buffer.
  *
  * @pbuf: Pointer to a buffer holding the packed value.
- * @uval: Pointer to an u64 holding the unpacked value.
+ * @uval: CPU-readable unpacked value to pack.
  * @startbit: The index (in logical notation, compensated for quirks) where
  *	      the packed value starts within pbuf. Must be larger than, or
  *	      equal to, endbit.
@@ -62,79 +169,68 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
  *	    the packed value ends within pbuf. Must be smaller than, or equal
  *	    to, startbit.
  * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
- * @op: If PACK, then uval will be treated as const pointer and copied (packed)
- *	into pbuf, between startbit and endbit.
- *	If UNPACK, then pbuf will be treated as const pointer and the logical
- *	value between startbit and endbit will be copied (unpacked) to uval.
  * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
  *	    QUIRK_MSB_ON_THE_RIGHT.
  *
  * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
- *	   correct usage, return code may be discarded.
- *	   If op is PACK, pbuf is modified.
- *	   If op is UNPACK, uval is modified.
+ *	   correct usage, return code may be discarded. The @pbuf memory will
+ *	   be modified on success.
  */
-int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
-	    enum packing_op op, u8 quirks)
+int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen,
+	 u8 quirks)
 {
-	/* Number of bits for storing "uval"
-	 * also width of the field to access in the pbuf
+	/* startbit is expected to be larger than endbit, and both are
+	 * expected to be within the logically addressable range of the buffer.
 	 */
-	u64 value_width;
-	/* Logical byte indices corresponding to the
-	 * start and end of the field.
-	 */
-	int plogical_first_u8, plogical_last_u8, box;
-
-	/* startbit is expected to be larger than endbit */
-	if (startbit < endbit)
+	if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen))
 		/* Invalid function call */
 		return -EINVAL;
 
-	value_width = startbit - endbit + 1;
-	if (value_width > 64)
+	if (unlikely(startbit - endbit >= 64))
 		return -ERANGE;
 
-	/* Check if "uval" fits in "value_width" bits.
-	 * If value_width is 64, the check will fail, but any
-	 * 64-bit uval will surely fit.
+	__pack(pbuf, uval, startbit, endbit, pbuflen, quirks);
+
+	return 0;
+}
+EXPORT_SYMBOL(pack);
+
+static void __unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
+		     size_t pbuflen, u8 quirks)
+{
+	/* Logical byte indices corresponding to the
+	 * start and end of the field.
 	 */
-	if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width)))
-		/* Cannot store "uval" inside "value_width" bits.
-		 * Truncating "uval" is most certainly not desirable,
-		 * so simply erroring out is appropriate.
-		 */
-		return -ERANGE;
+	int plogical_first_u8 = startbit / BITS_PER_BYTE;
+	int plogical_last_u8 = endbit / BITS_PER_BYTE;
+	int box;
 
 	/* Initialize parameter */
-	if (op == UNPACK)
-		*uval = 0;
+	*uval = 0;
 
 	/* Iterate through an idealistic view of the pbuf as an u64 with
 	 * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
 	 * logical bit significance. "box" denotes the current logical u8.
 	 */
-	plogical_first_u8 = startbit / 8;
-	plogical_last_u8  = endbit / 8;
-
 	for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
 		/* Bit indices into the currently accessed 8-bit box */
-		int box_start_bit, box_end_bit, box_addr;
+		size_t box_start_bit, box_end_bit, box_addr;
 		u8  box_mask;
 		/* Corresponding bits from the unpacked u64 parameter */
-		int proj_start_bit, proj_end_bit;
+		size_t proj_start_bit, proj_end_bit;
 		u64 proj_mask;
+		u64 pval;
 
 		/* This u8 may need to be accessed in its entirety
 		 * (from bit 7 to bit 0), or not, depending on the
 		 * input arguments startbit and endbit.
 		 */
 		if (box == plogical_first_u8)
-			box_start_bit = startbit % 8;
+			box_start_bit = startbit % BITS_PER_BYTE;
 		else
 			box_start_bit = 7;
 		if (box == plogical_last_u8)
-			box_end_bit = endbit % 8;
+			box_end_bit = endbit % BITS_PER_BYTE;
 		else
 			box_end_bit = 0;
 
@@ -145,57 +241,238 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
 		 * box is u8, the projection is u64 because it may fall
 		 * anywhere within the unpacked u64.
 		 */
-		proj_start_bit = ((box * 8) + box_start_bit) - endbit;
-		proj_end_bit   = ((box * 8) + box_end_bit) - endbit;
+		proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit;
+		proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit;
 		proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit);
-		box_mask  = GENMASK_ULL(box_start_bit, box_end_bit);
+		box_mask = GENMASK(box_start_bit, box_end_bit);
 
 		/* Determine the offset of the u8 box inside the pbuf,
 		 * adjusted for quirks. The adjusted box_addr will be used for
 		 * effective addressing inside the pbuf (so it's not
 		 * logical any longer).
 		 */
-		box_addr = pbuflen - box - 1;
-		if (quirks & QUIRK_LITTLE_ENDIAN)
-			box_addr = get_le_offset(box_addr);
-		if (quirks & QUIRK_LSW32_IS_FIRST)
-			box_addr = get_reverse_lsw32_offset(box_addr,
-							    pbuflen);
-
-		if (op == UNPACK) {
-			u64 pval;
-
-			/* Read from pbuf, write to uval */
-			pval = ((u8 *)pbuf)[box_addr] & box_mask;
-			if (quirks & QUIRK_MSB_ON_THE_RIGHT)
-				adjust_for_msb_right_quirk(&pval,
-							   &box_start_bit,
-							   &box_end_bit,
-							   &box_mask);
-
-			pval >>= box_end_bit;
-			pval <<= proj_end_bit;
-			*uval &= ~proj_mask;
-			*uval |= pval;
-		} else {
-			u64 pval;
-
-			/* Write to pbuf, read from uval */
-			pval = (*uval) & proj_mask;
-			pval >>= proj_end_bit;
-			if (quirks & QUIRK_MSB_ON_THE_RIGHT)
-				adjust_for_msb_right_quirk(&pval,
-							   &box_start_bit,
-							   &box_end_bit,
-							   &box_mask);
-
-			pval <<= box_end_bit;
-			((u8 *)pbuf)[box_addr] &= ~box_mask;
-			((u8 *)pbuf)[box_addr] |= pval;
-		}
+		box_addr = calculate_box_addr(box, pbuflen, quirks);
+
+		/* Read from pbuf, write to uval */
+		pval = ((u8 *)pbuf)[box_addr];
+
+		if (quirks & QUIRK_MSB_ON_THE_RIGHT)
+			pval = bitrev8(pval);
+
+		pval &= box_mask;
+
+		pval >>= box_end_bit;
+		pval <<= proj_end_bit;
+		*uval &= ~proj_mask;
+		*uval |= pval;
 	}
+}
+
+/**
+ * unpack - Unpack u64 number from packed buffer.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ *	      the packed value starts within pbuf. Must be larger than, or
+ *	      equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ *	    the packed value ends within pbuf. Must be smaller than, or equal
+ *	    to, startbit.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ *	   correct usage, return code may be discarded. The @uval will be
+ *	   modified on success.
+ */
+int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit,
+	   size_t pbuflen, u8 quirks)
+{
+	/* width of the field to access in the pbuf */
+	u64 value_width;
+
+	/* startbit is expected to be larger than endbit, and both are
+	 * expected to be within the logically addressable range of the buffer.
+	 */
+	if (startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen)
+		/* Invalid function call */
+		return -EINVAL;
+
+	value_width = startbit - endbit + 1;
+	if (value_width > 64)
+		return -ERANGE;
+
+	__unpack(pbuf, uval, startbit, endbit, pbuflen, quirks);
+
 	return 0;
 }
+EXPORT_SYMBOL(unpack);
+
+/**
+ * packing - Convert numbers (currently u64) between a packed and an unpacked
+ *	     format. Unpacked means laid out in memory in the CPU's native
+ *	     understanding of integers, while packed means anything else that
+ *	     requires translation.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ *	      the packed value starts within pbuf. Must be larger than, or
+ *	      equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ *	    the packed value ends within pbuf. Must be smaller than, or equal
+ *	    to, startbit.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @op: If PACK, then uval will be treated as const pointer and copied (packed)
+ *	into pbuf, between startbit and endbit.
+ *	If UNPACK, then pbuf will be treated as const pointer and the logical
+ *	value between startbit and endbit will be copied (unpacked) to uval.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Note: this is deprecated, prefer to use pack() or unpack() in new code.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ *	   correct usage, return code may be discarded.
+ *	   If op is PACK, pbuf is modified.
+ *	   If op is UNPACK, uval is modified.
+ */
+int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
+	    enum packing_op op, u8 quirks)
+{
+	if (op == PACK)
+		return pack(pbuf, *uval, startbit, endbit, pbuflen, quirks);
+
+	return unpack(pbuf, uval, startbit, endbit, pbuflen, quirks);
+}
 EXPORT_SYMBOL(packing);
 
+static u64 ustruct_field_to_u64(const void *ustruct, size_t field_offset,
+				size_t field_size)
+{
+	switch (field_size) {
+	case 1:
+		return *((u8 *)(ustruct + field_offset));
+	case 2:
+		return *((u16 *)(ustruct + field_offset));
+	case 4:
+		return *((u32 *)(ustruct + field_offset));
+	default:
+		return *((u64 *)(ustruct + field_offset));
+	}
+}
+
+static void u64_to_ustruct_field(void *ustruct, size_t field_offset,
+				 size_t field_size, u64 uval)
+{
+	switch (field_size) {
+	case 1:
+		*((u8 *)(ustruct + field_offset)) = uval;
+		break;
+	case 2:
+		*((u16 *)(ustruct + field_offset)) = uval;
+		break;
+	case 4:
+		*((u32 *)(ustruct + field_offset)) = uval;
+		break;
+	default:
+		*((u64 *)(ustruct + field_offset)) = uval;
+		break;
+	}
+}
+
+/**
+ * pack_fields_u8 - Pack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ *	     It is expected (but not checked) that this has the same data type
+ *	     as all struct packed_field_u8 definitions.
+ * @fields: Array of packed_field_u8 field definition. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the pack_fields() macro instead of calling this directly.
+ */
+void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct,
+		    const struct packed_field_u8 *fields, size_t num_fields,
+		    u8 quirks)
+{
+	__pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(pack_fields_u8);
+
+/**
+ * pack_fields_u16 - Pack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ *	     It is expected (but not checked) that this has the same data type
+ *	     as all struct packed_field_u16 definitions.
+ * @fields: Array of packed_field_u16 field definitions. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the pack_fields() macro instead of calling this directly.
+ */
+void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct,
+		     const struct packed_field_u16 *fields, size_t num_fields,
+		     u8 quirks)
+{
+	__pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(pack_fields_u16);
+
+/**
+ * unpack_fields_u8 - Unpack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ *	     It is expected (but not checked) that this has the same data type
+ *	     as all struct packed_field_u8 definitions.
+ * @fields: Array of packed_field_u8 field definitions. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the unpack_fields() macro instead of calling this directly.
+ */
+void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct,
+		      const struct packed_field_u8 *fields, size_t num_fields,
+		      u8 quirks)
+{
+	__unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(unpack_fields_u8);
+
+/**
+ * unpack_fields_u16 - Unpack array of fields
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf.
+ * @ustruct: Pointer to CPU-readable structure holding the unpacked value.
+ *	     It is expected (but not checked) that this has the same data type
+ *	     as all struct packed_field_u16 definitions.
+ * @fields: Array of packed_field_u16 field definitions. They must not overlap.
+ * @num_fields: Length of @fields array.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Use the unpack_fields() macro instead of calling this directly.
+ */
+void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct,
+		       const struct packed_field_u16 *fields, size_t num_fields,
+		       u8 quirks)
+{
+	__unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks);
+}
+EXPORT_SYMBOL(unpack_fields_u16);
+
 MODULE_DESCRIPTION("Generic bitfield packing and unpacking");
diff --git a/lib/packing_test.c b/lib/packing_test.c
new file mode 100644
index 000000000000..ce3b83d33b04
--- /dev/null
+++ b/lib/packing_test.c
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Vladimir Oltean <olteanv@gmail.com>
+ * Copyright (c) 2024, Intel Corporation.
+ */
+#include <kunit/test.h>
+#include <linux/packing.h>
+
+struct packing_test_case {
+	const char *desc;
+	const u8 *pbuf;
+	size_t pbuf_size;
+	u64 uval;
+	size_t start_bit;
+	size_t end_bit;
+	u8 quirks;
+};
+
+#define NO_QUIRKS	0
+
+/**
+ * PBUF - Initialize .pbuf and .pbuf_size
+ * @array: elements of constant physical buffer
+ *
+ * Initializes the .pbuf and .pbuf_size fields of a struct packing_test_case
+ * with a constant array of the specified elements.
+ */
+#define PBUF(array...)					\
+	.pbuf = (const u8[]){ array },			\
+	.pbuf_size = sizeof((const u8 []){ array })
+
+static const struct packing_test_case cases[] = {
+	/* These tests pack and unpack a magic 64-bit value
+	 * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an
+	 * otherwise zero array of 128 bits (16 bytes). They test all possible
+	 * bit layouts of the 128 bit buffer.
+	 */
+	{
+		.desc = "no quirks, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad,
+		     0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xbe, 0xef, 0xca, 0xfe,
+		     0xca, 0xfe, 0xde, 0xad, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xad, 0xde, 0xfe, 0xca,
+		     0xfe, 0xca, 0xef, 0xbe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x53, 0x7f, 0x7b, 0xb5,
+		     0x7d, 0xf7, 0x53, 0x7f, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT,
+	},
+	{
+		.desc = "msb right + lsw32 first, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x7d, 0xf7, 0x53, 0x7f,
+		     0x53, 0x7f, 0x7b, 0xb5, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "msb right + little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xb5, 0x7b, 0x7f, 0x53,
+		     0x7f, 0x53, 0xf7, 0x7d, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right + lsw32 first + little endian words, 16 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x7f, 0x53, 0xf7, 0x7d,
+		     0xb5, 0x7b, 0x7f, 0x53, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	/* These tests pack and unpack a magic 64-bit value
+	 * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an
+	 * otherwise zero array of varying size from 18 bytes to 24 bytes.
+	 */
+	{
+		.desc = "no quirks, 18 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0xfe,
+		     0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00,
+		     0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 19 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca,
+		     0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00,
+		     0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 20 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe,
+		     0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 22 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef,
+		     0xca, 0xfe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "no quirks, 24 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad,
+		     0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 18 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 19 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 20 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 22 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 24 bytes",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe,
+		     0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00,
+		     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xcafedeadbeefcafe,
+		.start_bit = 95,
+		.end_bit = 32,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	/* These tests pack and unpack a magic 64-bit value
+	 * (0x1122334455667788) at an odd starting bit (43) within an
+	 * otherwise zero array of 128 bits (16 bytes). They test all possible
+	 * bit layouts of the 128 bit buffer.
+	 */
+	{
+		.desc = "no quirks, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x89, 0x11, 0x9a, 0x22, 0xab,
+		     0x33, 0xbc, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x33, 0xbc, 0x40, 0x00,
+		     0x11, 0x9a, 0x22, 0xab, 0x00, 0x00, 0x00, 0x89),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "little endian words, 16 bytes, non-aligned",
+		PBUF(0x89, 0x00, 0x00, 0x00, 0xab, 0x22, 0x9a, 0x11,
+		     0x00, 0x40, 0xbc, 0x33, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xbc, 0x33,
+		     0xab, 0x22, 0x9a, 0x11, 0x89, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x91, 0x88, 0x59, 0x44, 0xd5,
+		     0xcc, 0x3d, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT,
+	},
+	{
+		.desc = "msb right + lsw32 first, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xcc, 0x3d, 0x02, 0x00,
+		     0x88, 0x59, 0x44, 0xd5, 0x00, 0x00, 0x00, 0x91),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "msb right + little endian words, 16 bytes, non-aligned",
+		PBUF(0x91, 0x00, 0x00, 0x00, 0xd5, 0x44, 0x59, 0x88,
+		     0x00, 0x02, 0x3d, 0xcc, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x3d, 0xcc,
+		     0xd5, 0x44, 0x59, 0x88, 0x91, 0x00, 0x00, 0x00),
+		.uval = 0x1122334455667788,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	/* These tests pack and unpack a u64 with all bits set
+	 * (0xffffffffffffffff) at an odd starting bit (43) within an
+	 * otherwise zero array of 128 bits (16 bytes). They test all possible
+	 * bit layouts of the 128 bit buffer.
+	 */
+	{
+		.desc = "no quirks, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = NO_QUIRKS,
+	},
+	{
+		.desc = "lsw32 first, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xf8, 0x00,
+		     0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x07, 0xff),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		     0x00, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "lsw32 first + little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff,
+		     0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT,
+	},
+	{
+		.desc = "msb right + lsw32 first, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x1f, 0x00,
+		     0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xe0, 0xff),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST,
+	},
+	{
+		.desc = "msb right + little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0xff, 0xe0, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+		     0x00, 0x1f, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN,
+	},
+	{
+		.desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned, 0xff",
+		PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0xff, 0xff,
+		     0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x00, 0x00),
+		.uval = 0xffffffffffffffff,
+		.start_bit = 106,
+		.end_bit = 43,
+		.quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN,
+	},
+};
+
+KUNIT_ARRAY_PARAM_DESC(packing, cases, desc);
+
+static void packing_test_pack(struct kunit *test)
+{
+	const struct packing_test_case *params = test->param_value;
+	u8 *pbuf;
+	int err;
+
+	pbuf = kunit_kzalloc(test, params->pbuf_size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, pbuf);
+
+	err = pack(pbuf, params->uval, params->start_bit, params->end_bit,
+		   params->pbuf_size, params->quirks);
+
+	KUNIT_EXPECT_EQ_MSG(test, err, 0, "pack() returned %pe\n", ERR_PTR(err));
+	KUNIT_EXPECT_MEMEQ(test, pbuf, params->pbuf, params->pbuf_size);
+}
+
+static void packing_test_unpack(struct kunit *test)
+{
+	const struct packing_test_case *params = test->param_value;
+	u64 uval;
+	int err;
+
+	err = unpack(params->pbuf, &uval, params->start_bit, params->end_bit,
+		     params->pbuf_size, params->quirks);
+	KUNIT_EXPECT_EQ_MSG(test, err, 0, "unpack() returned %pe\n", ERR_PTR(err));
+	KUNIT_EXPECT_EQ(test, uval, params->uval);
+}
+
+#define PACKED_BUF_SIZE 8
+
+typedef struct __packed { u8 buf[PACKED_BUF_SIZE]; } packed_buf_t;
+
+struct test_data {
+	u32 field3;
+	u16 field2;
+	u16 field4;
+	u16 field6;
+	u8 field1;
+	u8 field5;
+};
+
+static const struct packed_field_u8 test_fields[] = {
+	PACKED_FIELD(63, 61, struct test_data, field1),
+	PACKED_FIELD(60, 52, struct test_data, field2),
+	PACKED_FIELD(51, 28, struct test_data, field3),
+	PACKED_FIELD(27, 14, struct test_data, field4),
+	PACKED_FIELD(13, 9, struct test_data, field5),
+	PACKED_FIELD(8, 0, struct test_data, field6),
+};
+
+static void packing_test_pack_fields(struct kunit *test)
+{
+	const struct test_data data = {
+		.field1 = 0x2,
+		.field2 = 0x100,
+		.field3 = 0xF00050,
+		.field4 = 0x7D3,
+		.field5 = 0x9,
+		.field6 = 0x10B,
+	};
+	packed_buf_t expect = {
+		.buf = { 0x50, 0x0F, 0x00, 0x05, 0x01, 0xF4, 0xD3, 0x0B },
+	};
+	packed_buf_t buf = {};
+
+	pack_fields(&buf, sizeof(buf), &data, test_fields, 0);
+
+	KUNIT_EXPECT_MEMEQ(test, &expect, &buf, sizeof(buf));
+}
+
+static void packing_test_unpack_fields(struct kunit *test)
+{
+	const packed_buf_t buf = {
+		.buf = { 0x17, 0x28, 0x10, 0x19, 0x3D, 0xA9, 0x07, 0x9C },
+	};
+	struct test_data data = {};
+
+	unpack_fields(&buf, sizeof(buf), &data, test_fields, 0);
+
+	KUNIT_EXPECT_EQ(test, 0, data.field1);
+	KUNIT_EXPECT_EQ(test, 0x172, data.field2);
+	KUNIT_EXPECT_EQ(test, 0x810193, data.field3);
+	KUNIT_EXPECT_EQ(test, 0x36A4, data.field4);
+	KUNIT_EXPECT_EQ(test, 0x3, data.field5);
+	KUNIT_EXPECT_EQ(test, 0x19C, data.field6);
+}
+
+static struct kunit_case packing_test_cases[] = {
+	KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params),
+	KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params),
+	KUNIT_CASE(packing_test_pack_fields),
+	KUNIT_CASE(packing_test_unpack_fields),
+	{},
+};
+
+static struct kunit_suite packing_test_suite = {
+	.name = "packing",
+	.test_cases = packing_test_cases,
+};
+
+kunit_test_suite(packing_test_suite);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for packing library");
diff --git a/lib/parser.c b/lib/parser.c
index f4eafb9d74e6..73e8f8e5be73 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -275,8 +275,9 @@ EXPORT_SYMBOL(match_hex);
  *
  * Description: Parse the string @str to check if matches wildcard
  * pattern @pattern. The pattern may contain two types of wildcards:
- *   '*' - matches zero or more characters
- *   '?' - matches one character
+ *
+ * * '*' - matches zero or more characters
+ * * '?' - matches one character
  *
  * Return: If the @str matches the @pattern, return true, else return false.
  */
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 44dd133594d4..2891f94a11c6 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -73,17 +73,50 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 EXPORT_SYMBOL(percpu_counter_set);
 
 /*
- * local_irq_save() is needed to make the function irq safe:
- * - The slow path would be ok as protected by an irq-safe spinlock.
- * - this_cpu_add would be ok as it is irq-safe by definition.
- * But:
- * The decision slow path/fast path and the actual update must be atomic, too.
+ * Add to a counter while respecting batch size.
+ *
+ * There are 2 implementations, both dealing with the following problem:
+ *
+ * The decision slow path/fast path and the actual update must be atomic.
  * Otherwise a call in process context could check the current values and
  * decide that the fast path can be used. If now an interrupt occurs before
  * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters),
  * then the this_cpu_add() that is executed after the interrupt has completed
  * can produce values larger than "batch" or even overflows.
  */
+#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
+/*
+ * Safety against interrupts is achieved in 2 ways:
+ * 1. the fast path uses local cmpxchg (note: no lock prefix)
+ * 2. the slow path operates with interrupts disabled
+ */
+void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
+{
+	s64 count;
+	unsigned long flags;
+
+	count = this_cpu_read(*fbc->counters);
+	do {
+		if (unlikely(abs(count + amount) >= batch)) {
+			raw_spin_lock_irqsave(&fbc->lock, flags);
+			/*
+			 * Note: by now we might have migrated to another CPU
+			 * or the value might have changed.
+			 */
+			count = __this_cpu_read(*fbc->counters);
+			fbc->count += count + amount;
+			__this_cpu_sub(*fbc->counters, count);
+			raw_spin_unlock_irqrestore(&fbc->lock, flags);
+			return;
+		}
+	} while (!this_cpu_try_cmpxchg(*fbc->counters, &count, count + amount));
+}
+#else
+/*
+ * local_irq_save() is used to make the function irq safe:
+ * - The slow path would be ok as protected by an irq-safe spinlock.
+ * - this_cpu_add would be ok as it is irq-safe by definition.
+ */
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;
@@ -101,6 +134,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 	}
 	local_irq_restore(flags);
 }
+#endif
 EXPORT_SYMBOL(percpu_counter_add_batch);
 
 /*
@@ -175,7 +209,7 @@ int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
 		INIT_LIST_HEAD(&fbc[i].list);
 #endif
 		fbc[i].count = amount;
-		fbc[i].counters = (void *)counters + (i * counter_size);
+		fbc[i].counters = (void __percpu *)counters + i * counter_size;
 
 		debug_percpu_counter_activate(&fbc[i]);
 	}
diff --git a/lib/percpu_test.c b/lib/percpu_test.c
index 4a3d70bbc1a0..ce7124b16dab 100644
--- a/lib/percpu_test.c
+++ b/lib/percpu_test.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <linux/limits.h>
 #include <linux/module.h>
 
 /* validate @native and @pcp counter values match @expected */
@@ -24,8 +25,9 @@ static int __init percpu_test_init(void)
 	 * +ul_one/-ul_one below would replace with inc/dec instructions.
 	 */
 	volatile unsigned int ui_one = 1;
-	long l = 0;
+	unsigned long long ull = 0;
 	unsigned long ul = 0;
+	long l = 0;
 
 	pr_info("percpu test start\n");
 
@@ -112,6 +114,13 @@ static int __init percpu_test_init(void)
 	CHECK(ul, ulong_counter, -1);
 	CHECK(ul, ulong_counter, ULONG_MAX);
 
+	ul = ull = 0;
+	__this_cpu_write(ulong_counter, 0);
+
+	ul = ull += UINT_MAX;
+	__this_cpu_add(ulong_counter, ull);
+	CHECK(ul, ulong_counter, UINT_MAX);
+
 	ul = 3;
 	__this_cpu_write(ulong_counter, 3);
 
diff --git a/lib/pldmfw/pldmfw.c b/lib/pldmfw/pldmfw.c
index 54e1809a38fd..6264e2013f25 100644
--- a/lib/pldmfw/pldmfw.c
+++ b/lib/pldmfw/pldmfw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (C) 2018-2019, Intel Corporation. */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <linux/crc32.h>
 #include <linux/device.h>
 #include <linux/firmware.h>
@@ -481,9 +481,17 @@ static int pldm_parse_components(struct pldmfw_priv *data)
 		component->component_data = data->fw->data + offset;
 		component->component_size = size;
 
+		if (data->context->mode == PLDMFW_UPDATE_MODE_SINGLE_COMPONENT &&
+		    data->context->component_identifier != component->identifier)
+			continue;
+
 		list_add_tail(&component->entry, &data->components);
 	}
 
+	if (data->context->mode == PLDMFW_UPDATE_MODE_SINGLE_COMPONENT &&
+	    list_empty(&data->components))
+		return -ENOENT;
+
 	header_crc_ptr = data->fw->data + data->offset;
 
 	err = pldm_move_fw_offset(data, sizeof(data->header_crc));
diff --git a/lib/plist.c b/lib/plist.c
index 0d86ed7a76ac..c6bce1226874 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -47,8 +47,8 @@ static void plist_check_list(struct list_head *top)
 
 	plist_check_prev_next(top, prev, next);
 	while (next != top) {
-		prev = next;
-		next = prev->next;
+		WRITE_ONCE(prev, next);
+		WRITE_ONCE(next, prev->next);
 		plist_check_prev_next(top, prev, next);
 	}
 }
@@ -72,7 +72,7 @@ static void plist_check_head(struct plist_head *head)
  */
 void plist_add(struct plist_node *node, struct plist_head *head)
 {
-	struct plist_node *first, *iter, *prev = NULL;
+	struct plist_node *first, *iter, *prev = NULL, *last, *reverse_iter;
 	struct list_head *node_next = &head->node_list;
 
 	plist_check_head(head);
@@ -83,16 +83,26 @@ void plist_add(struct plist_node *node, struct plist_head *head)
 		goto ins_node;
 
 	first = iter = plist_first(head);
+	last = reverse_iter = list_entry(first->prio_list.prev, struct plist_node, prio_list);
 
 	do {
 		if (node->prio < iter->prio) {
 			node_next = &iter->node_list;
 			break;
+		} else if (node->prio >= reverse_iter->prio) {
+			prev = reverse_iter;
+			iter = list_entry(reverse_iter->prio_list.next,
+				struct plist_node, prio_list);
+			if (likely(reverse_iter != last))
+				node_next = &iter->node_list;
+			break;
 		}
 
 		prev = iter;
 		iter = list_entry(iter->prio_list.next,
 				struct plist_node, prio_list);
+		reverse_iter = list_entry(reverse_iter->prio_list.prev,
+				struct plist_node, prio_list);
 	} while (iter != first);
 
 	if (!prev || prev->prio != node->prio)
@@ -255,6 +265,32 @@ static int  __init plist_test(void)
 	}
 
 	printk(KERN_DEBUG "end plist test\n");
+
+	/* Worst case test for plist_add() */
+	unsigned int test_data[241];
+
+	for (i = 0; i < ARRAY_SIZE(test_data); i++)
+		test_data[i] = i;
+
+	ktime_t start, end, time_elapsed = 0;
+
+	plist_head_init(&test_head);
+
+	for (i = 0; i < ARRAY_SIZE(test_node); i++) {
+		plist_node_init(test_node + i, 0);
+		test_node[i].prio = test_data[i];
+	}
+
+	for (i = 0; i < ARRAY_SIZE(test_node); i++) {
+		if (plist_node_empty(test_node + i)) {
+			start = ktime_get();
+			plist_add(test_node + i, &test_head);
+			end = ktime_get();
+			time_elapsed += (end - start);
+		}
+	}
+
+	pr_debug("plist_add worst case test time elapsed %lld\n", time_elapsed);
 	return 0;
 }
 
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 385a94aa0b99..29127dd05d63 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -33,27 +33,8 @@ CFLAGS_REMOVE_vpermxor8.o += -msoft-float
 endif
 endif
 
-# The GCC option -ffreestanding is required in order to compile code containing
-# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
-ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
-NEON_FLAGS := -ffreestanding
-# Enable <arm_neon.h>
-NEON_FLAGS += -isystem $(shell $(CC) -print-file-name=include)
-ifeq ($(ARCH),arm)
-NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
-endif
-CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
-ifeq ($(ARCH),arm64)
-CFLAGS_REMOVE_recov_neon_inner.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only
-endif
-endif
-
 quiet_cmd_unroll = UNROLL  $@
-      cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@
+      cmd_unroll = $(AWK) -v N=$* -f $(src)/unroll.awk < $< > $@
 
 targets += int1.c int2.c int4.c int8.c
 $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE
@@ -75,10 +56,16 @@ targets += vpermxor1.c vpermxor2.c vpermxor4.c vpermxor8.c
 $(obj)/vpermxor%.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
-CFLAGS_neon1.o += $(NEON_FLAGS)
-CFLAGS_neon2.o += $(NEON_FLAGS)
-CFLAGS_neon4.o += $(NEON_FLAGS)
-CFLAGS_neon8.o += $(NEON_FLAGS)
+CFLAGS_neon1.o += $(CC_FLAGS_FPU)
+CFLAGS_neon2.o += $(CC_FLAGS_FPU)
+CFLAGS_neon4.o += $(CC_FLAGS_FPU)
+CFLAGS_neon8.o += $(CC_FLAGS_FPU)
+CFLAGS_recov_neon_inner.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_neon1.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon2.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon4.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon8.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_recov_neon_inner.o += $(CC_FLAGS_NO_FPU)
 targets += neon1.c neon2.c neon4.c neon8.c
 $(obj)/neon%.c: $(src)/neon.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
diff --git a/lib/random32.c b/lib/random32.c
index 32060b852668..24e7acd9343f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -36,11 +36,11 @@
 #include <linux/percpu.h>
 #include <linux/export.h>
 #include <linux/jiffies.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 5114eda6309c..989c2d615f92 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -297,9 +297,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				 *   / \           / \
 				 *  N   S    -->  N   sl
 				 *     / \             \
-				 *    sl  Sr            S
+				 *    sl  sr            S
 				 *                       \
-				 *                        Sr
+				 *                        sr
 				 *
 				 * Note: p might be red, and then both
 				 * p and sl are red after rotation(which
@@ -312,9 +312,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				 *   / \            /  \
 				 *  N   sl   -->   P    S
 				 *       \        /      \
-				 *        S      N        Sr
+				 *        S      N        sr
 				 *         \
-				 *          Sr
+				 *          sr
 				 */
 				tmp1 = tmp2->rb_right;
 				WRITE_ONCE(sibling->rb_left, tmp1);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 41ae3c7570d3..8655a76d29a1 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -2,7 +2,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/rbtree_augmented.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <asm/timex.h>
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6ae2ba8e06a2..3e555d012ed6 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -130,7 +130,8 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
 	if (ntbl)
 		return ntbl;
 
-	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+	ntbl = alloc_hooks_tag(ht->alloc_tag,
+			kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO));
 
 	if (ntbl && leaf) {
 		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
@@ -157,7 +158,8 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
 
 	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
 
-	tbl = kzalloc(size, gfp);
+	tbl = alloc_hooks_tag(ht->alloc_tag,
+			kmalloc_noprof(size, gfp|__GFP_ZERO));
 	if (!tbl)
 		return NULL;
 
@@ -181,11 +183,13 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	int i;
 	static struct lock_class_key __key;
 
-	tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
+	tbl = alloc_hooks_tag(ht->alloc_tag,
+			kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets),
+					     gfp|__GFP_ZERO, NUMA_NO_NODE));
 
 	size = nbuckets;
 
-	if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
+	if (tbl == NULL && !gfpflags_allow_blocking(gfp)) {
 		tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
 		nbuckets = 0;
 	}
@@ -580,10 +584,6 @@ static struct bucket_table *rhashtable_insert_one(
 	 */
 	rht_assign_locked(bkt, obj);
 
-	atomic_inc(&ht->nelems);
-	if (rht_grow_above_75(ht, tbl))
-		schedule_work(&ht->run_work);
-
 	return NULL;
 }
 
@@ -611,15 +611,23 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 			new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 			data = ERR_PTR(-EAGAIN);
 		} else {
+			bool inserted;
+
 			flags = rht_lock(tbl, bkt);
 			data = rhashtable_lookup_one(ht, bkt, tbl,
 						     hash, key, obj);
 			new_tbl = rhashtable_insert_one(ht, bkt, tbl,
 							hash, obj, data);
+			inserted = data && !new_tbl;
+			if (inserted)
+				atomic_inc(&ht->nelems);
 			if (PTR_ERR(new_tbl) != -EEXIST)
 				data = ERR_CAST(new_tbl);
 
 			rht_unlock(tbl, bkt, flags);
+
+			if (inserted && rht_grow_above_75(ht, tbl))
+				schedule_work(&ht->run_work);
 		}
 	} while (!IS_ERR_OR_NULL(new_tbl));
 
@@ -661,7 +669,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
  * structure outside the hash table.
  *
  * This function may be called from any process context, including
- * non-preemptable context, but cannot be called from softirq or
+ * non-preemptible context, but cannot be called from softirq or
  * hardirq context.
  *
  * You must call rhashtable_walk_exit after this function returns.
@@ -1016,7 +1024,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *	.obj_hashfn = my_hash_fn,
  * };
  */
-int rhashtable_init(struct rhashtable *ht,
+int rhashtable_init_noprof(struct rhashtable *ht,
 		    const struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
@@ -1031,6 +1039,8 @@ int rhashtable_init(struct rhashtable *ht,
 	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
 
+	alloc_tag_record(ht->alloc_tag);
+
 	if (params->min_size)
 		ht->p.min_size = roundup_pow_of_two(params->min_size);
 
@@ -1076,7 +1086,7 @@ int rhashtable_init(struct rhashtable *ht,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_init);
+EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
 
 /**
  * rhltable_init - initialize a new hash list table
@@ -1087,15 +1097,15 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
  *
  * See documentation for rhashtable_init.
  */
-int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params)
 {
 	int err;
 
-	err = rhashtable_init(&hlt->ht, params);
+	err = rhashtable_init_noprof(&hlt->ht, params);
 	hlt->ht.rhlist = true;
 	return err;
 }
-EXPORT_SYMBOL_GPL(rhltable_init);
+EXPORT_SYMBOL_GPL(rhltable_init_noprof);
 
 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
 				void (*free_fn)(void *ptr, void *arg),
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 1e453f825c05..d3412984170c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -60,12 +60,30 @@ static inline void update_alloc_hint_after_get(struct sbitmap *sb,
 /*
  * See if we have deferred clears that we can batch move
  */
-static inline bool sbitmap_deferred_clear(struct sbitmap_word *map)
+static inline bool sbitmap_deferred_clear(struct sbitmap_word *map,
+		unsigned int depth, unsigned int alloc_hint, bool wrap)
 {
-	unsigned long mask;
+	unsigned long mask, word_mask;
 
-	if (!READ_ONCE(map->cleared))
-		return false;
+	guard(raw_spinlock_irqsave)(&map->swap_lock);
+
+	if (!map->cleared) {
+		if (depth == 0)
+			return false;
+
+		word_mask = (~0UL) >> (BITS_PER_LONG - depth);
+		/*
+		 * The current behavior is to always retry after moving
+		 * ->cleared to word, and we change it to retry in case
+		 * of any free bits. To avoid an infinite loop, we need
+		 * to take wrap & alloc_hint into account, otherwise a
+		 * soft lockup may occur.
+		 */
+		if (!wrap && alloc_hint)
+			word_mask &= ~((1UL << alloc_hint) - 1);
+
+		return (READ_ONCE(map->word) & word_mask) != word_mask;
+	}
 
 	/*
 	 * First get a stable cleared mask, setting the old mask to 0.
@@ -85,6 +103,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		      bool alloc_hint)
 {
 	unsigned int bits_per_word;
+	int i;
 
 	if (shift < 0)
 		shift = sbitmap_calculate_shift(depth);
@@ -116,6 +135,9 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		return -ENOMEM;
 	}
 
+	for (i = 0; i < sb->map_nr; i++)
+		raw_spin_lock_init(&sb->map[i].swap_lock);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sbitmap_init_node);
@@ -126,7 +148,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
 	unsigned int i;
 
 	for (i = 0; i < sb->map_nr; i++)
-		sbitmap_deferred_clear(&sb->map[i]);
+		sbitmap_deferred_clear(&sb->map[i], 0, 0, 0);
 
 	sb->depth = depth;
 	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
@@ -179,7 +201,7 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map,
 					alloc_hint, wrap);
 		if (nr != -1)
 			break;
-		if (!sbitmap_deferred_clear(map))
+		if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap))
 			break;
 	} while (1);
 
@@ -496,7 +518,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
 		unsigned int map_depth = __map_depth(sb, index);
 		unsigned long val;
 
-		sbitmap_deferred_clear(map);
+		sbitmap_deferred_clear(map, 0, 0, 0);
 		val = READ_ONCE(map->word);
 		if (val == (1UL << (map_depth - 1)) - 1)
 			goto next;
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 7bc2220fea80..5bb6b8aff232 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -11,6 +11,7 @@
 #include <linux/kmemleak.h>
 #include <linux/bvec.h>
 #include <linux/uio.h>
+#include <linux/folio_queue.h>
 
 /**
  * sg_next - return the next scatterlist entry in a list
@@ -473,14 +474,14 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
 		return -EOPNOTSUPP;
 
 	if (sgt_append->prv) {
-		unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
-			sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
+		unsigned long next_pfn;
 
 		if (WARN_ON(offset))
 			return -EINVAL;
 
 		/* Merge contiguous pages into the last SG */
 		prv_len = sgt_append->prv->length;
+		next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE;
 		if (page_to_pfn(pages[0]) == next_pfn) {
 			last_pg = pfn_to_page(next_pfn - 1);
 			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
@@ -1262,6 +1263,67 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
 }
 
 /*
+ * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to
+ * the scatterlist.  The pages are not pinned.
+ */
+static ssize_t extract_folioq_to_sg(struct iov_iter *iter,
+				   ssize_t maxsize,
+				   struct sg_table *sgtable,
+				   unsigned int sg_max,
+				   iov_iter_extraction_t extraction_flags)
+{
+	const struct folio_queue *folioq = iter->folioq;
+	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+	unsigned int slot = iter->folioq_slot;
+	ssize_t ret = 0;
+	size_t offset = iter->iov_offset;
+
+	BUG_ON(!folioq);
+
+	if (slot >= folioq_nr_slots(folioq)) {
+		folioq = folioq->next;
+		if (WARN_ON_ONCE(!folioq))
+			return 0;
+		slot = 0;
+	}
+
+	do {
+		struct folio *folio = folioq_folio(folioq, slot);
+		size_t fsize = folioq_folio_size(folioq, slot);
+
+		if (offset < fsize) {
+			size_t part = umin(maxsize - ret, fsize - offset);
+
+			sg_set_page(sg, folio_page(folio, 0), part, offset);
+			sgtable->nents++;
+			sg++;
+			sg_max--;
+			offset += part;
+			ret += part;
+		}
+
+		if (offset >= fsize) {
+			offset = 0;
+			slot++;
+			if (slot >= folioq_nr_slots(folioq)) {
+				if (!folioq->next) {
+					WARN_ON_ONCE(ret < iter->count);
+					break;
+				}
+				folioq = folioq->next;
+				slot = 0;
+			}
+		}
+	} while (sg_max > 0 && ret < maxsize);
+
+	iter->folioq = folioq;
+	iter->folioq_slot = slot;
+	iter->iov_offset = offset;
+	iter->count -= ret;
+	return ret;
+}
+
+/*
  * Extract up to sg_max folios from an XARRAY-type iterator and add them to
  * the scatterlist.  The pages are not pinned.
  */
@@ -1323,8 +1385,8 @@ static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
  * addition of @sg_max elements.
  *
  * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
- * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
- * and DISCARD-type are not supported.
+ * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't
+ * pinned; DISCARD-type is not supported.
  *
  * No end mark is placed on the scatterlist; that's left to the caller.
  *
@@ -1356,6 +1418,9 @@ ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
 	case ITER_KVEC:
 		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
 					  extraction_flags);
+	case ITER_FOLIOQ:
+		return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max,
+					    extraction_flags);
 	case ITER_XARRAY:
 		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
 					    extraction_flags);
diff --git a/lib/siphash.c b/lib/siphash.c
index 15bc5b6f368c..9e4e88752d2e 100644
--- a/lib/siphash.c
+++ b/lib/siphash.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/siphash.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
 #include <linux/dcache.h>
diff --git a/lib/siphash_kunit.c b/lib/siphash_kunit.c
index a3c697e8be35..26bd4e8dc03e 100644
--- a/lib/siphash_kunit.c
+++ b/lib/siphash_kunit.c
@@ -194,4 +194,5 @@ static struct kunit_suite siphash_test_suite = {
 kunit_test_suite(siphash_test_suite);
 
 MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_DESCRIPTION("Test cases for siphash.c");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 4ce960438806..f11691315c2f 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -5,6 +5,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/rcupdate.h>
 #include "../mm/slab.h"
 
 static struct kunit_resource resource;
@@ -140,7 +141,7 @@ static void test_kmalloc_redzone_access(struct kunit *test)
 {
 	struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
 				SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
-	u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
+	u8 *p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 18));
 
 	kasan_disable_current();
 
@@ -157,6 +158,81 @@ static void test_kmalloc_redzone_access(struct kunit *test)
 	kmem_cache_destroy(s);
 }
 
+struct test_kfree_rcu_struct {
+	struct rcu_head rcu;
+};
+
+static void test_kfree_rcu(struct kunit *test)
+{
+	struct kmem_cache *s;
+	struct test_kfree_rcu_struct *p;
+
+	if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
+		kunit_skip(test, "can't do kfree_rcu() when test is built-in");
+
+	s = test_kmem_cache_create("TestSlub_kfree_rcu",
+				   sizeof(struct test_kfree_rcu_struct),
+				   SLAB_NO_MERGE);
+	p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kfree_rcu(p, rcu);
+	kmem_cache_destroy(s);
+
+	KUNIT_EXPECT_EQ(test, 0, slab_errors);
+}
+
+static void test_leak_destroy(struct kunit *test)
+{
+	struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
+							64, SLAB_NO_MERGE);
+	kmem_cache_alloc(s, GFP_KERNEL);
+
+	kmem_cache_destroy(s);
+
+	KUNIT_EXPECT_EQ(test, 2, slab_errors);
+}
+
+static void test_krealloc_redzone_zeroing(struct kunit *test)
+{
+	u8 *p;
+	int i;
+	struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
+				SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
+
+	p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
+	memset(p, 0xff, 48);
+
+	kasan_disable_current();
+	OPTIMIZER_HIDE_VAR(p);
+
+	/* Test shrink */
+	p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
+	for (i = 40; i < 64; i++)
+		KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
+
+	/* Test grow within the same 64B kmalloc object */
+	p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
+	for (i = 40; i < 56; i++)
+		KUNIT_EXPECT_EQ(test, p[i], 0);
+	for (i = 56; i < 64; i++)
+		KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 0, slab_errors);
+
+	memset(p, 0xff, 56);
+	/* Test grow with allocating a bigger 128B object */
+	p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
+	for (i = 0; i < 56; i++)
+		KUNIT_EXPECT_EQ(test, p[i], 0xff);
+	for (i = 56; i < 112; i++)
+		KUNIT_EXPECT_EQ(test, p[i], 0);
+
+	kfree(p);
+	kasan_enable_current();
+	kmem_cache_destroy(s);
+}
+
 static int test_init(struct kunit *test)
 {
 	slab_errors = 0;
@@ -177,6 +253,9 @@ static struct kunit_case test_cases[] = {
 
 	KUNIT_CASE(test_clobber_redzone_free),
 	KUNIT_CASE(test_kmalloc_redzone_access),
+	KUNIT_CASE(test_kfree_rcu),
+	KUNIT_CASE(test_leak_destroy),
+	KUNIT_CASE(test_krealloc_redzone_zeroing),
 	{}
 };
 
diff --git a/lib/sort.c b/lib/sort.c
index a0509088f82a..8e73dc55476b 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -5,13 +5,11 @@
  * This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
  * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
  *
- * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
+ * Quicksort manages n*log2(n) - 1.26*n for random inputs (1.63*n
  * better) at the expense of stack usage and much larger code to avoid
  * quicksort's O(n^2) worst case.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include <linux/types.h>
 #include <linux/export.h>
 #include <linux/sort.h>
@@ -202,6 +200,13 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
  * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
  * avoids a slow retpoline and so is significantly faster.
  *
+ * The comparison function must adhere to specific mathematical
+ * properties to ensure correct and stable sorting:
+ * - Antisymmetry: cmp_func(a, b) must return the opposite sign of
+ * cmp_func(b, a).
+ * - Transitivity: if cmp_func(a, b) <= 0 and cmp_func(b, c) <= 0, then
+ * cmp_func(a, c) <= 0.
+ *
  * Sorting time is O(n log n) both on average and worst-case. While
  * quicksort is slightly faster on average, it suffers from exploitable
  * O(n*n) worst-case behavior and extra memory requirements that make
@@ -252,10 +257,7 @@ void sort_r(void *base, size_t num, size_t size,
 			a = size << shift;
 			n -= size;
 			do_swap(base + a, base + n, size, swap_func, priv);
-		} else if (n > size) {	/* Sorting: Extract root */
-			n -= size;
-			do_swap(base, base + n, size, swap_func, priv);
-		} else	{		/* Sort complete */
+		} else {		/* Sort complete */
 			break;
 		}
 
@@ -285,6 +287,11 @@ void sort_r(void *base, size_t num, size_t size,
 			do_swap(base + b, base + c, size, swap_func, priv);
 		}
 	}
+
+	n -= size;
+	do_swap(base, base + n, size, swap_func, priv);
+	if (n == size * 2 && do_cmp(base, base + size, cmp_func, priv) > 0)
+		do_swap(base, base + size, size, swap_func, priv);
 }
 EXPORT_SYMBOL(sort_r);
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index cd8f23455285..245d5b416699 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -624,20 +624,21 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
 	 * we won't be able to do that under the lock.
 	 */
 	if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
-		/*
-		 * Zero out zone modifiers, as we don't have specific zone
-		 * requirements. Keep the flags related to allocation in atomic
-		 * contexts, I/O, nolockdep.
-		 */
-		alloc_flags &= ~GFP_ZONEMASK;
-		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
-		alloc_flags |= __GFP_NOWARN;
-		page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
+		page = alloc_pages(gfp_nested_mask(alloc_flags),
+				   DEPOT_POOL_ORDER);
 		if (page)
 			prealloc = page_address(page);
 	}
 
-	raw_spin_lock_irqsave(&pool_lock, flags);
+	if (in_nmi()) {
+		/* We can never allocate in NMI context. */
+		WARN_ON_ONCE(can_alloc);
+		/* Best effort; bail if we fail to take the lock. */
+		if (!raw_spin_trylock_irqsave(&pool_lock, flags))
+			goto exit;
+	} else {
+		raw_spin_lock_irqsave(&pool_lock, flags);
+	}
 	printk_deferred_enter();
 
 	/* Try to find again, to avoid concurrently inserting duplicates. */
diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c
index 3bc14d1ee816..135322592faf 100644
--- a/lib/stackinit_kunit.c
+++ b/lib/stackinit_kunit.c
@@ -47,10 +47,12 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 #define DO_NOTHING_TYPE_SCALAR(var_type)	var_type
 #define DO_NOTHING_TYPE_STRING(var_type)	void
 #define DO_NOTHING_TYPE_STRUCT(var_type)	void
+#define DO_NOTHING_TYPE_UNION(var_type)		void
 
 #define DO_NOTHING_RETURN_SCALAR(ptr)		*(ptr)
 #define DO_NOTHING_RETURN_STRING(ptr)		/**/
 #define DO_NOTHING_RETURN_STRUCT(ptr)		/**/
+#define DO_NOTHING_RETURN_UNION(ptr)		/**/
 
 #define DO_NOTHING_CALL_SCALAR(var, name)			\
 		(var) = do_nothing_ ## name(&(var))
@@ -58,10 +60,13 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 		do_nothing_ ## name(var)
 #define DO_NOTHING_CALL_STRUCT(var, name)			\
 		do_nothing_ ## name(&(var))
+#define DO_NOTHING_CALL_UNION(var, name)			\
+		do_nothing_ ## name(&(var))
 
 #define FETCH_ARG_SCALAR(var)		&var
 #define FETCH_ARG_STRING(var)		var
 #define FETCH_ARG_STRUCT(var)		&var
+#define FETCH_ARG_UNION(var)		&var
 
 /*
  * On m68k, if the leaf function test variable is longer than 8 bytes,
@@ -70,13 +75,16 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
  */
 #ifdef CONFIG_M68K
 #define FILL_SIZE_STRING		8
+#define FILL_SIZE_ARRAY			2
 #else
 #define FILL_SIZE_STRING		16
+#define FILL_SIZE_ARRAY			8
 #endif
 
 #define INIT_CLONE_SCALAR		/**/
 #define INIT_CLONE_STRING		[FILL_SIZE_STRING]
 #define INIT_CLONE_STRUCT		/**/
+#define INIT_CLONE_UNION		/**/
 
 #define ZERO_CLONE_SCALAR(zero)		memset(&(zero), 0x00, sizeof(zero))
 #define ZERO_CLONE_STRING(zero)		memset(&(zero), 0x00, sizeof(zero))
@@ -92,6 +100,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 		zero.three = 0;				\
 		zero.four = 0;				\
 	} while (0)
+#define ZERO_CLONE_UNION(zero)		ZERO_CLONE_STRUCT(zero)
 
 #define INIT_SCALAR_none(var_type)	/**/
 #define INIT_SCALAR_zero(var_type)	= 0
@@ -101,6 +110,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 
 #define INIT_STRUCT_none(var_type)	/**/
 #define INIT_STRUCT_zero(var_type)	= { }
+#define INIT_STRUCT_old_zero(var_type)	= { 0 }
 
 
 #define __static_partial		{ .two = 0, }
@@ -146,6 +156,34 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
 #define INIT_STRUCT_assigned_copy(var_type)				\
 					; var = *(arg)
 
+/* Union initialization is the same as structs. */
+#define INIT_UNION_none(var_type)	INIT_STRUCT_none(var_type)
+#define INIT_UNION_zero(var_type)	INIT_STRUCT_zero(var_type)
+#define INIT_UNION_old_zero(var_type)	INIT_STRUCT_old_zero(var_type)
+
+#define INIT_UNION_static_partial(var_type)		\
+	INIT_STRUCT_static_partial(var_type)
+#define INIT_UNION_static_all(var_type)			\
+	INIT_STRUCT_static_all(var_type)
+#define INIT_UNION_dynamic_partial(var_type)		\
+	INIT_STRUCT_dynamic_partial(var_type)
+#define INIT_UNION_dynamic_all(var_type)		\
+	INIT_STRUCT_dynamic_all(var_type)
+#define INIT_UNION_runtime_partial(var_type)		\
+	INIT_STRUCT_runtime_partial(var_type)
+#define INIT_UNION_runtime_all(var_type)		\
+	INIT_STRUCT_runtime_all(var_type)
+#define INIT_UNION_assigned_static_partial(var_type)	\
+	INIT_STRUCT_assigned_static_partial(var_type)
+#define INIT_UNION_assigned_static_all(var_type)	\
+	INIT_STRUCT_assigned_static_all(var_type)
+#define INIT_UNION_assigned_dynamic_partial(var_type)	\
+	INIT_STRUCT_assigned_dynamic_partial(var_type)
+#define INIT_UNION_assigned_dynamic_all(var_type)	\
+	INIT_STRUCT_assigned_dynamic_all(var_type)
+#define INIT_UNION_assigned_copy(var_type)		\
+	INIT_STRUCT_assigned_copy(var_type)
+
 /*
  * @name: unique string name for the test
  * @var_type: type to be tested for zeroing initialization
@@ -212,6 +250,7 @@ static noinline void test_ ## name (struct kunit *test)		\
 static noinline DO_NOTHING_TYPE_ ## which(var_type)		\
 do_nothing_ ## name(var_type *ptr)				\
 {								\
+	OPTIMIZER_HIDE_VAR(ptr);				\
 	/* Will always be true, but compiler doesn't know. */	\
 	if ((unsigned long)ptr > 0x2)				\
 		return DO_NOTHING_RETURN_ ## which(ptr);	\
@@ -293,6 +332,33 @@ struct test_user {
 	unsigned long four;
 };
 
+/* No padding: all members are the same size. */
+union test_same_sizes {
+	unsigned long one;
+	unsigned long two;
+	unsigned long three;
+	unsigned long four;
+};
+
+/* Mismatched sizes, with one and two being small */
+union test_small_start {
+	char one:1;
+	char two;
+	short three;
+	unsigned long four;
+	struct big_struct {
+		unsigned long array[FILL_SIZE_ARRAY];
+	} big;
+};
+
+/* Mismatched sizes, with three and four being small */
+union test_small_end {
+	short one;
+	unsigned long two;
+	char three:1;
+	char four;
+};
+
 #define ALWAYS_PASS	WANT_SUCCESS
 #define ALWAYS_FAIL	XFAIL
 
@@ -331,6 +397,11 @@ struct test_user {
 			    struct test_ ## name, STRUCT, init, \
 			    xfail)
 
+#define DEFINE_UNION_TEST(name, init, xfail)			\
+		DEFINE_TEST(name ## _ ## init,			\
+			    union test_ ## name, STRUCT, init,	\
+			    xfail)
+
 #define DEFINE_STRUCT_TESTS(init, xfail)			\
 		DEFINE_STRUCT_TEST(small_hole, init, xfail);	\
 		DEFINE_STRUCT_TEST(big_hole, init, xfail);	\
@@ -342,9 +413,22 @@ struct test_user {
 				    xfail);			\
 		DEFINE_STRUCT_TESTS(base ## _ ## all, xfail)
 
+#define DEFINE_UNION_INITIALIZER_TESTS(base, xfail)		\
+		DEFINE_UNION_TESTS(base ## _ ## partial,	\
+				    xfail);			\
+		DEFINE_UNION_TESTS(base ## _ ## all, xfail)
+
+#define DEFINE_UNION_TESTS(init, xfail)				\
+		DEFINE_UNION_TEST(same_sizes, init, xfail);	\
+		DEFINE_UNION_TEST(small_start, init, xfail);	\
+		DEFINE_UNION_TEST(small_end, init, xfail);
+
 /* These should be fully initialized all the time! */
 DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS);
 DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS);
+DEFINE_STRUCT_TESTS(old_zero, ALWAYS_PASS);
+DEFINE_UNION_TESTS(zero, ALWAYS_PASS);
+DEFINE_UNION_TESTS(old_zero, ALWAYS_PASS);
 /* Struct initializers: padding may be left uninitialized. */
 DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS);
@@ -352,6 +436,12 @@ DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS);
 DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS);
 DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL);
+DEFINE_UNION_INITIALIZER_TESTS(static, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(dynamic, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(runtime, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(assigned_static, STRONG_PASS);
+DEFINE_UNION_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS);
+DEFINE_UNION_TESTS(assigned_copy, ALWAYS_FAIL);
 /* No initialization without compiler instrumentation. */
 DEFINE_SCALAR_TESTS(none, STRONG_PASS);
 DEFINE_STRUCT_TESTS(none, BYREF_PASS);
@@ -435,13 +525,23 @@ DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL);
 		KUNIT_CASE(test_trailing_hole_ ## init),\
 		KUNIT_CASE(test_packed_ ## init)	\
 
+#define KUNIT_test_unions(init)				\
+		KUNIT_CASE(test_same_sizes_ ## init),	\
+		KUNIT_CASE(test_small_start_ ## init),	\
+		KUNIT_CASE(test_small_end_ ## init)	\
+
 static struct kunit_case stackinit_test_cases[] = {
 	/* These are explicitly initialized and should always pass. */
 	KUNIT_test_scalars(zero),
 	KUNIT_test_structs(zero),
+	KUNIT_test_structs(old_zero),
+	KUNIT_test_unions(zero),
+	KUNIT_test_unions(old_zero),
 	/* Padding here appears to be accidentally always initialized? */
 	KUNIT_test_structs(dynamic_partial),
 	KUNIT_test_structs(assigned_dynamic_partial),
+	KUNIT_test_unions(dynamic_partial),
+	KUNIT_test_unions(assigned_dynamic_partial),
 	/* Padding initialization depends on compiler behaviors. */
 	KUNIT_test_structs(static_partial),
 	KUNIT_test_structs(static_all),
@@ -451,8 +551,17 @@ static struct kunit_case stackinit_test_cases[] = {
 	KUNIT_test_structs(assigned_static_partial),
 	KUNIT_test_structs(assigned_static_all),
 	KUNIT_test_structs(assigned_dynamic_all),
+	KUNIT_test_unions(static_partial),
+	KUNIT_test_unions(static_all),
+	KUNIT_test_unions(dynamic_all),
+	KUNIT_test_unions(runtime_partial),
+	KUNIT_test_unions(runtime_all),
+	KUNIT_test_unions(assigned_static_partial),
+	KUNIT_test_unions(assigned_static_all),
+	KUNIT_test_unions(assigned_dynamic_all),
 	/* Everything fails this since it effectively performs a memcpy(). */
 	KUNIT_test_structs(assigned_copy),
+	KUNIT_test_unions(assigned_copy),
 	/* STRUCTLEAK_BYREF_ALL should cover everything from here down. */
 	KUNIT_test_scalars(none),
 	KUNIT_CASE(test_switch_1_none),
@@ -471,4 +580,5 @@ static struct kunit_suite stackinit_test_suite = {
 
 kunit_test_suites(&stackinit_test_suite);
 
+MODULE_DESCRIPTION("Test cases for compiler-based stack variable zeroing");
 MODULE_LICENSE("GPL");
diff --git a/lib/string.c b/lib/string.c
index 966da44bfc86..eb4486ed40d2 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -27,7 +27,7 @@
 
 #include <asm/page.h>
 #include <asm/rwonce.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <asm/word-at-a-time.h>
 
 #ifndef __HAVE_ARCH_STRNCASECMP
@@ -104,6 +104,12 @@ char *strncpy(char *dest, const char *src, size_t count)
 EXPORT_SYMBOL(strncpy);
 #endif
 
+#ifdef __BIG_ENDIAN
+# define ALLBUTLAST_BYTE_MASK (~255ul)
+#else
+# define ALLBUTLAST_BYTE_MASK (~0ul >> 8)
+#endif
+
 ssize_t sized_strscpy(char *dest, const char *src, size_t count)
 {
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
@@ -147,13 +153,18 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count)
 			*(unsigned long *)(dest+res) = c & zero_bytemask(data);
 			return res + find_zero(data);
 		}
+		count -= sizeof(unsigned long);
+		if (unlikely(!count)) {
+			c &= ALLBUTLAST_BYTE_MASK;
+			*(unsigned long *)(dest+res) = c;
+			return -E2BIG;
+		}
 		*(unsigned long *)(dest+res) = c;
 		res += sizeof(unsigned long);
-		count -= sizeof(unsigned long);
 		max -= sizeof(unsigned long);
 	}
 
-	while (count) {
+	while (count > 1) {
 		char c;
 
 		c = src[res];
@@ -164,11 +175,11 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count)
 		count--;
 	}
 
-	/* Hit buffer length without finding a NUL; force NUL-termination. */
-	if (res)
-		dest[res-1] = '\0';
+	/* Force NUL-termination. */
+	dest[res] = '\0';
 
-	return -E2BIG;
+	/* Return E2BIG if the source didn't stop */
+	return src[res] ? -E2BIG : res;
 }
 EXPORT_SYMBOL(sized_strscpy);
 
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 69ba49b853c7..91fa37b5c510 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -57,7 +57,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
 	static const unsigned int rounding[] = { 500, 50, 5 };
 	int i = 0, j;
 	u32 remainder = 0, sf_cap;
-	char tmp[8];
+	char tmp[12];
 	const char *unit;
 
 	tmp[0] = '\0';
@@ -321,6 +321,9 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 {
 	char *out = dst;
 
+	if (!size)
+		size = SIZE_MAX;
+
 	while (*src && --size) {
 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
 			src++;
diff --git a/lib/string_helpers_kunit.c b/lib/string_helpers_kunit.c
index f88e39fd68d6..c853046183d2 100644
--- a/lib/string_helpers_kunit.c
+++ b/lib/string_helpers_kunit.c
@@ -625,4 +625,5 @@ static struct kunit_suite string_helpers_test_suite = {
 
 kunit_test_suites(&string_helpers_test_suite);
 
+MODULE_DESCRIPTION("Test cases for string helpers module");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/string_kunit.c b/lib/string_kunit.c
index 2a812decf14b..c919e3293da6 100644
--- a/lib/string_kunit.c
+++ b/lib/string_kunit.c
@@ -633,4 +633,5 @@ static struct kunit_suite string_test_suite = {
 
 kunit_test_suites(&string_test_suite);
 
+MODULE_DESCRIPTION("Test cases for string functions");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 6432b8c3e431..6dc234913dd5 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -120,6 +120,18 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 	if (unlikely(count <= 0))
 		return 0;
 
+	kasan_check_write(dst, count);
+	check_object_size(dst, count, false);
+
+	if (can_do_masked_user_access()) {
+		long retval;
+
+		src = masked_user_access_begin(src);
+		retval = do_strncpy_from_user(dst, src, count, count);
+		user_read_access_end();
+		return retval;
+	}
+
 	max_addr = TASK_SIZE_MAX;
 	src_addr = (unsigned long)untagged_addr(src);
 	if (likely(src_addr < max_addr)) {
@@ -133,8 +145,6 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 		if (max > count)
 			max = count;
 
-		kasan_check_write(dst, count);
-		check_object_size(dst, count, false);
 		if (user_read_access_begin(src, max)) {
 			retval = do_strncpy_from_user(dst, src, count, max);
 			user_read_access_end();
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index feeb935a2299..6e489f9e90f1 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -96,6 +96,15 @@ long strnlen_user(const char __user *str, long count)
 	if (unlikely(count <= 0))
 		return 0;
 
+	if (can_do_masked_user_access()) {
+		long retval;
+
+		str = masked_user_access_begin(str);
+		retval = do_strnlen_user(str, count, count);
+		user_read_access_end();
+		return retval;
+	}
+
 	max_addr = TASK_SIZE_MAX;
 	src_addr = (unsigned long)untagged_addr(str);
 	if (likely(src_addr < max_addr)) {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index f355f67169b6..ee87fef66cb5 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -732,4 +732,5 @@ static int __init test_kstrtox_init(void)
 	return -EINVAL;
 }
 module_init(test_kstrtox_init);
+MODULE_DESCRIPTION("Module test for kstrto*() APIs");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 83019beabce4..65a75d58ed9e 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -244,7 +244,7 @@ static void __init test_find_nth_bit(void)
 	expect_eq_uint(60,  find_nth_bit(bmap, 64 * 3, 5));
 	expect_eq_uint(80,  find_nth_bit(bmap, 64 * 3, 6));
 	expect_eq_uint(123, find_nth_bit(bmap, 64 * 3, 7));
-	expect_eq_uint(64 * 3, find_nth_bit(bmap, 64 * 3, 8));
+	expect_eq_uint(0,   !!(find_nth_bit(bmap, 64 * 3, 8) < 64 * 3));
 
 	expect_eq_uint(10,  find_nth_bit(bmap, 64 * 3 - 1, 0));
 	expect_eq_uint(20,  find_nth_bit(bmap, 64 * 3 - 1, 1));
@@ -254,7 +254,7 @@ static void __init test_find_nth_bit(void)
 	expect_eq_uint(60,  find_nth_bit(bmap, 64 * 3 - 1, 5));
 	expect_eq_uint(80,  find_nth_bit(bmap, 64 * 3 - 1, 6));
 	expect_eq_uint(123, find_nth_bit(bmap, 64 * 3 - 1, 7));
-	expect_eq_uint(64 * 3 - 1, find_nth_bit(bmap, 64 * 3 - 1, 8));
+	expect_eq_uint(0,   !!(find_nth_bit(bmap, 64 * 3 - 1, 8) < 64 * 3 - 1));
 
 	for_each_set_bit(bit, exp1, EXP1_IN_BITS) {
 		b = find_nth_bit(exp1, EXP1_IN_BITS, cnt++);
@@ -1486,4 +1486,5 @@ static void __init selftest(void)
 
 KSTM_MODULE_LOADERS(test_bitmap);
 MODULE_AUTHOR("david decotigny <david.decotigny@googlers.com>");
+MODULE_DESCRIPTION("Test cases for bitmap API");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_bitops.c b/lib/test_bitops.c
index 3b7bcbee84db..55669624bb28 100644
--- a/lib/test_bitops.c
+++ b/lib/test_bitops.c
@@ -5,9 +5,11 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/cleanup.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/printk.h>
+#include <linux/slab.h>
 
 /* a tiny module only meant to test
  *
@@ -50,6 +52,30 @@ static unsigned long order_comb_long[][2] = {
 };
 #endif
 
+static int __init test_fns(void)
+{
+	static volatile __always_used unsigned long tmp __initdata;
+	unsigned long *buf __free(kfree) = NULL;
+	unsigned int i, n;
+	ktime_t time;
+
+	buf = kmalloc_array(10000, sizeof(unsigned long), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	get_random_bytes(buf, 10000 * sizeof(unsigned long));
+	time = ktime_get();
+
+	for (n = 0; n < BITS_PER_LONG; n++)
+		for (i = 0; i < 10000; i++)
+			tmp = fns(buf[i], n);
+
+	time = ktime_get() - time;
+	pr_err("fns:  %18llu ns\n", time);
+
+	return 0;
+}
+
 static int __init test_bitops_startup(void)
 {
 	int i, bit_set;
@@ -94,6 +120,8 @@ static int __init test_bitops_startup(void)
 	if (bit_set != BITOPS_LAST)
 		pr_err("ERROR: FOUND SET BIT %d\n", bit_set);
 
+	test_fns();
+
 	pr_info("Completed bitops test\n");
 
 	return 0;
diff --git a/lib/test_bits.c b/lib/test_bits.c
index c9368a2314e7..c7b38d91e1f1 100644
--- a/lib/test_bits.c
+++ b/lib/test_bits.c
@@ -39,6 +39,36 @@ static void genmask_ull_test(struct kunit *test)
 #endif
 }
 
+static void genmask_u128_test(struct kunit *test)
+{
+#ifdef CONFIG_ARCH_SUPPORTS_INT128
+	/* Below 64 bit masks */
+	KUNIT_EXPECT_EQ(test, 0x0000000000000001ull, GENMASK_U128(0, 0));
+	KUNIT_EXPECT_EQ(test, 0x0000000000000003ull, GENMASK_U128(1, 0));
+	KUNIT_EXPECT_EQ(test, 0x0000000000000006ull, GENMASK_U128(2, 1));
+	KUNIT_EXPECT_EQ(test, 0x00000000ffffffffull, GENMASK_U128(31, 0));
+	KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, GENMASK_U128(39, 21));
+	KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(63, 0));
+
+	/* Above 64 bit masks - only 64 bit portion can be validated once */
+	KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(64, 0) >> 1);
+	KUNIT_EXPECT_EQ(test, 0x00000000ffffffffull, GENMASK_U128(81, 50) >> 50);
+	KUNIT_EXPECT_EQ(test, 0x0000000000ffffffull, GENMASK_U128(87, 64) >> 64);
+	KUNIT_EXPECT_EQ(test, 0x0000000000ff0000ull, GENMASK_U128(87, 80) >> 64);
+
+	KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_U128(127, 0) >> 64);
+	KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, (u64)GENMASK_U128(127, 0));
+	KUNIT_EXPECT_EQ(test, 0x0000000000000003ull, GENMASK_U128(127, 126) >> 126);
+	KUNIT_EXPECT_EQ(test, 0x0000000000000001ull, GENMASK_U128(127, 127) >> 127);
+#ifdef TEST_GENMASK_FAILURES
+	/* these should fail compilation */
+	GENMASK_U128(0, 1);
+	GENMASK_U128(0, 10);
+	GENMASK_U128(9, 10);
+#endif /* TEST_GENMASK_FAILURES */
+#endif /* CONFIG_ARCH_SUPPORTS_INT128 */
+}
+
 static void genmask_input_check_test(struct kunit *test)
 {
 	unsigned int x, y;
@@ -56,12 +86,16 @@ static void genmask_input_check_test(struct kunit *test)
 	/* Valid input */
 	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(1, 1));
 	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(39, 21));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(100, 80));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(110, 65));
+	KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(127, 0));
 }
 
 
 static struct kunit_case bits_test_cases[] = {
 	KUNIT_CASE(genmask_test),
 	KUNIT_CASE(genmask_ull_test),
+	KUNIT_CASE(genmask_u128_test),
 	KUNIT_CASE(genmask_input_check_test),
 	{}
 };
@@ -72,4 +106,5 @@ static struct kunit_suite bits_test_suite = {
 };
 kunit_test_suite(bits_test_suite);
 
+MODULE_DESCRIPTION("Test cases for functions and macros in bits.h");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c
index f247089d63c0..ec290ac2a0d9 100644
--- a/lib/test_blackhole_dev.c
+++ b/lib/test_blackhole_dev.c
@@ -96,4 +96,5 @@ module_init(test_blackholedev_init);
 module_exit(test_blackholedev_exit);
 
 MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
+MODULE_DESCRIPTION("module test of the blackhole_dev");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 207ff87194db..af0041df2b72 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -14,7 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
 
@@ -478,7 +478,7 @@ static int __bpf_ld_imm64(struct bpf_insn insns[2], u8 reg, s64 imm64)
  * to overflow the field size of the native instruction, triggering
  * a branch conversion mechanism in some JITs.
  */
-static int __bpf_fill_max_jmp(struct bpf_test *self, int jmp, int imm)
+static int __bpf_fill_max_jmp(struct bpf_test *self, int jmp, int imm, bool alu32)
 {
 	struct bpf_insn *insns;
 	int len = S16_MAX + 5;
@@ -501,7 +501,7 @@ static int __bpf_fill_max_jmp(struct bpf_test *self, int jmp, int imm)
 		};
 		int op = ops[(i >> 1) % ARRAY_SIZE(ops)];
 
-		if (i & 1)
+		if ((i & 1) || alu32)
 			insns[i++] = BPF_ALU32_REG(op, R0, R1);
 		else
 			insns[i++] = BPF_ALU64_REG(op, R0, R1);
@@ -516,27 +516,47 @@ static int __bpf_fill_max_jmp(struct bpf_test *self, int jmp, int imm)
 }
 
 /* Branch taken by runtime decision */
+static int bpf_fill_max_jmp_taken_32(struct bpf_test *self)
+{
+	return __bpf_fill_max_jmp(self, BPF_JEQ, 1, true);
+}
+
 static int bpf_fill_max_jmp_taken(struct bpf_test *self)
 {
-	return __bpf_fill_max_jmp(self, BPF_JEQ, 1);
+	return __bpf_fill_max_jmp(self, BPF_JEQ, 1, false);
 }
 
 /* Branch not taken by runtime decision */
+static int bpf_fill_max_jmp_not_taken_32(struct bpf_test *self)
+{
+	return __bpf_fill_max_jmp(self, BPF_JEQ, 0, true);
+}
+
 static int bpf_fill_max_jmp_not_taken(struct bpf_test *self)
 {
-	return __bpf_fill_max_jmp(self, BPF_JEQ, 0);
+	return __bpf_fill_max_jmp(self, BPF_JEQ, 0, false);
 }
 
 /* Branch always taken, known at JIT time */
+static int bpf_fill_max_jmp_always_taken_32(struct bpf_test *self)
+{
+	return __bpf_fill_max_jmp(self, BPF_JGE, 0, true);
+}
+
 static int bpf_fill_max_jmp_always_taken(struct bpf_test *self)
 {
-	return __bpf_fill_max_jmp(self, BPF_JGE, 0);
+	return __bpf_fill_max_jmp(self, BPF_JGE, 0, false);
 }
 
 /* Branch never taken, known at JIT time */
+static int bpf_fill_max_jmp_never_taken_32(struct bpf_test *self)
+{
+	return __bpf_fill_max_jmp(self, BPF_JLT, 0, true);
+}
+
 static int bpf_fill_max_jmp_never_taken(struct bpf_test *self)
 {
-	return __bpf_fill_max_jmp(self, BPF_JLT, 0);
+	return __bpf_fill_max_jmp(self, BPF_JLT, 0, false);
 }
 
 /* ALU result computation used in tests */
@@ -1740,7 +1760,7 @@ static int __bpf_emit_cmpxchg32(struct bpf_test *self, void *arg,
 	/* Result unsuccessful */
 	insns[i++] = BPF_STX_MEM(BPF_W, R10, R1, -4);
 	insns[i++] = BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R2, -4);
-	insns[i++] = BPF_ZEXT_REG(R0), /* Zext always inserted by verifier */
+	insns[i++] = BPF_ZEXT_REG(R0); /* Zext always inserted by verifier */
 	insns[i++] = BPF_LDX_MEM(BPF_W, R3, R10, -4);
 
 	insns[i++] = BPF_JMP32_REG(BPF_JEQ, R1, R3, 2);
@@ -1754,7 +1774,7 @@ static int __bpf_emit_cmpxchg32(struct bpf_test *self, void *arg,
 	/* Result successful */
 	i += __bpf_ld_imm64(&insns[i], R0, dst);
 	insns[i++] = BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R2, -4);
-	insns[i++] = BPF_ZEXT_REG(R0), /* Zext always inserted by verifier */
+	insns[i++] = BPF_ZEXT_REG(R0); /* Zext always inserted by verifier */
 	insns[i++] = BPF_LDX_MEM(BPF_W, R3, R10, -4);
 
 	insns[i++] = BPF_JMP32_REG(BPF_JEQ, R2, R3, 2);
@@ -14234,6 +14254,38 @@ static struct bpf_test tests[] = {
 	},
 	/* Conditional branch conversions */
 	{
+		"Long conditional jump: taken at runtime (32 bits)",
+		{ },
+		INTERNAL | FLAG_NO_DATA,
+		{ },
+		{ { 0, 1 } },
+		.fill_helper = bpf_fill_max_jmp_taken_32,
+	},
+	{
+		"Long conditional jump: not taken at runtime (32 bits)",
+		{ },
+		INTERNAL | FLAG_NO_DATA,
+		{ },
+		{ { 0, 2 } },
+		.fill_helper = bpf_fill_max_jmp_not_taken_32,
+	},
+	{
+		"Long conditional jump: always taken, known at JIT time (32 bits)",
+		{ },
+		INTERNAL | FLAG_NO_DATA,
+		{ },
+		{ { 0, 1 } },
+		.fill_helper = bpf_fill_max_jmp_always_taken_32,
+	},
+	{
+		"Long conditional jump: never taken, known at JIT time (32 bits)",
+		{ },
+		INTERNAL | FLAG_NO_DATA,
+		{ },
+		{ { 0, 2 } },
+		.fill_helper = bpf_fill_max_jmp_never_taken_32,
+	},
+	{
 		"Long conditional jump: taken at runtime",
 		{ },
 		INTERNAL | FLAG_NO_DATA,
@@ -15077,8 +15129,7 @@ static struct skb_segment_test skb_segment_tests[] __initconst = {
 		.build_skb = build_test_skb_linear_no_head_frag,
 		.features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			    NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
-			    NETIF_F_LLTX | NETIF_F_GRO |
-			    NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
+			    NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
 			    NETIF_F_HW_VLAN_STAG_TX
 	}
 };
@@ -15198,6 +15249,7 @@ struct tail_call_test {
 	int flags;
 	int result;
 	int stack_depth;
+	bool has_tail_call;
 };
 
 /* Flags that can be passed to tail call test cases */
@@ -15273,6 +15325,7 @@ static struct tail_call_test tail_call_tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = 3,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call 3",
@@ -15283,6 +15336,7 @@ static struct tail_call_test tail_call_tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = 6,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call 4",
@@ -15293,6 +15347,7 @@ static struct tail_call_test tail_call_tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = 10,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call load/store leaf",
@@ -15323,6 +15378,7 @@ static struct tail_call_test tail_call_tests[] = {
 		},
 		.result = 0,
 		.stack_depth = 16,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call error path, max count reached",
@@ -15335,6 +15391,7 @@ static struct tail_call_test tail_call_tests[] = {
 		},
 		.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
 		.result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call count preserved across function calls",
@@ -15357,6 +15414,7 @@ static struct tail_call_test tail_call_tests[] = {
 		.stack_depth = 8,
 		.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
 		.result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call error path, NULL target",
@@ -15369,6 +15427,7 @@ static struct tail_call_test tail_call_tests[] = {
 		},
 		.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
 		.result = MAX_TESTRUNS,
+		.has_tail_call = true,
 	},
 	{
 		"Tail call error path, index out of range",
@@ -15381,6 +15440,7 @@ static struct tail_call_test tail_call_tests[] = {
 		},
 		.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
 		.result = MAX_TESTRUNS,
+		.has_tail_call = true,
 	},
 };
 
@@ -15430,6 +15490,7 @@ static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
 		fp->len = len;
 		fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
 		fp->aux->stack_depth = test->stack_depth;
+		fp->aux->tail_call_reachable = test->has_tail_call;
 		memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
 
 		/* Relocate runtime tail call offsets and addresses */
@@ -15706,4 +15767,5 @@ static void __exit test_bpf_exit(void)
 module_init(test_bpf_init);
 module_exit(test_bpf_exit);
 
+MODULE_DESCRIPTION("Testsuite for BPF interpreter and BPF JIT compiler");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_dynamic_debug.c b/lib/test_dynamic_debug.c
index 8dd250ad022b..77c2a669b6af 100644
--- a/lib/test_dynamic_debug.c
+++ b/lib/test_dynamic_debug.c
@@ -162,4 +162,5 @@ module_init(test_dynamic_debug_init);
 module_exit(test_dynamic_debug_exit);
 
 MODULE_AUTHOR("Jim Cromie <jim.cromie@gmail.com>");
+MODULE_DESCRIPTION("Kernel module for testing dynamic_debug");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 9cfdcd6d21db..211222e63328 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -27,7 +27,7 @@
 #include <linux/vmalloc.h>
 #include <linux/efi_embedded_fw.h>
 
-MODULE_IMPORT_NS(TEST_FIRMWARE);
+MODULE_IMPORT_NS("TEST_FIRMWARE");
 
 #define TEST_FIRMWARE_NAME	"test-firmware.bin"
 #define TEST_FIRMWARE_NUM_REQS	4
@@ -1567,4 +1567,5 @@ static void __exit test_firmware_exit(void)
 module_exit(test_firmware_exit);
 
 MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_DESCRIPTION("interface to trigger and test firmware loading");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_fortify/.gitignore b/lib/test_fortify/.gitignore
new file mode 100644
index 000000000000..c1ba37d14b50
--- /dev/null
+++ b/lib/test_fortify/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*.log
diff --git a/lib/test_fortify/Makefile b/lib/test_fortify/Makefile
new file mode 100644
index 000000000000..1c3f82ad8bb2
--- /dev/null
+++ b/lib/test_fortify/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := $(call cc-disable-warning,fortify-source)
+
+quiet_cmd_test_fortify = TEST    $@
+      cmd_test_fortify = $(CONFIG_SHELL) $(src)/test_fortify.sh \
+			$< $@ "$(NM)" $(CC) $(c_flags) -DKBUILD_EXTRA_WARN1
+
+$(obj)/%.log: $(src)/%.c $(src)/test_fortify.sh FORCE
+	$(call if_changed_dep,test_fortify)
+
+logs = $(patsubst $(src)/%.c, %.log, $(wildcard $(src)/*-*.c))
+targets += $(logs)
+
+quiet_cmd_gen_fortify_log = CAT     $@
+      cmd_gen_fortify_log = cat $(or $(real-prereqs),/dev/null) > $@
+
+$(obj)/test_fortify.log: $(addprefix $(obj)/, $(logs)) FORCE
+	$(call if_changed,gen_fortify_log)
+
+# GCC<=7 does not always produce *.d files.
+# Run the tests only for GCC>=8 or Clang.
+always-$(call gcc-min-version, 80000) += test_fortify.log
+always-$(CONFIG_CC_IS_CLANG)          += test_fortify.log
+
+# Some architectures define __NO_FORTIFY if __SANITIZE_ADDRESS__ is undefined.
+# Pass CFLAGS_KASAN to avoid warnings.
+KASAN_SANITIZE := y
diff --git a/lib/test_fortify/test_fortify.sh b/lib/test_fortify/test_fortify.sh
new file mode 100644
index 000000000000..c2688ab8281d
--- /dev/null
+++ b/lib/test_fortify/test_fortify.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+set -e
+
+# Argument 1: Source file to build.
+IN="$1"
+shift
+# Extract just the filename for error messages below.
+FILE="${IN##*/}"
+# Extract the function name for error messages below.
+FUNC="${FILE#*-}"
+FUNC="${FUNC%%-*}"
+FUNC="${FUNC%%.*}"
+# Extract the symbol to test for in build/symbol test below.
+WANT="__${FILE%%-*}"
+
+# Argument 2: Where to write the build log.
+OUT="$1"
+shift
+TMP="${OUT}.tmp"
+
+# Argument 3: Path to "nm" tool.
+NM="$1"
+shift
+
+# Remaining arguments are: $(CC) $(c_flags)
+
+# Clean up temporary file at exit.
+__cleanup() {
+	rm -f "$TMP"
+}
+trap __cleanup EXIT
+
+# Function names in warnings are wrapped in backticks under UTF-8 locales.
+# Run the commands with LANG=C so that grep output will not change.
+export LANG=C
+
+status=
+# Attempt to build a source that is expected to fail with a specific warning.
+if "$@" -Werror -c "$IN" -o "$OUT".o 2> "$TMP" ; then
+	# If the build succeeds, either the test has failed or the
+	# warning may only happen at link time (Clang). In that case,
+	# make sure the expected symbol is unresolved in the symbol list.
+	# If so, FORTIFY is working for this case.
+	if ! $NM -A "$OUT".o | grep -m1 "\bU ${WANT}$" >>"$TMP" ; then
+		status="warning: unsafe ${FUNC}() usage lacked '$WANT' symbol in $IN"
+	fi
+else
+	# If the build failed, check for the warning in the stderr.
+	# GCC:
+	# ./include/linux/fortify-string.h:316:25: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning]
+	# Clang 14:
+	# ./include/linux/fortify-string.h:316:4: error: call to __write_overflow_field declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror,-Wattribute-warning]
+	if ! grep -Eq -m1 "error: call to .?\b${WANT}\b.?" "$TMP" ; then
+		status="warning: unsafe ${FUNC}() usage lacked '$WANT' warning in $IN"
+	fi
+fi
+
+if [ -n "$status" ]; then
+	# Report on failure results, including compilation warnings.
+	echo "$status" | tee "$OUT" >&2
+else
+	# Report on good results, and save any compilation output to log.
+	echo "ok: unsafe ${FUNC}() usage correctly detected with '$WANT' in $IN" >"$OUT"
+fi
+cat "$TMP" >>"$OUT"
diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c
index 24de0e5ff859..cf92111b5c79 100644
--- a/lib/test_fprobe.c
+++ b/lib/test_fprobe.c
@@ -17,10 +17,8 @@ static u32 rand1, entry_val, exit_val;
 /* Use indirect calls to avoid inlining the target functions */
 static u32 (*target)(u32 value);
 static u32 (*target2)(u32 value);
-static u32 (*target_nest)(u32 value, u32 (*nest)(u32));
 static unsigned long target_ip;
 static unsigned long target2_ip;
-static unsigned long target_nest_ip;
 static int entry_return_value;
 
 static noinline u32 fprobe_selftest_target(u32 value)
@@ -33,14 +31,9 @@ static noinline u32 fprobe_selftest_target2(u32 value)
 	return (value / div_factor) + 1;
 }
 
-static noinline u32 fprobe_selftest_nest_target(u32 value, u32 (*nest)(u32))
-{
-	return nest(value + 2);
-}
-
 static notrace int fp_entry_handler(struct fprobe *fp, unsigned long ip,
 				    unsigned long ret_ip,
-				    struct pt_regs *regs, void *data)
+				    struct ftrace_regs *fregs, void *data)
 {
 	KUNIT_EXPECT_FALSE(current_test, preemptible());
 	/* This can be called on the fprobe_selftest_target and the fprobe_selftest_target2 */
@@ -59,9 +52,9 @@ static notrace int fp_entry_handler(struct fprobe *fp, unsigned long ip,
 
 static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip,
 				    unsigned long ret_ip,
-				    struct pt_regs *regs, void *data)
+				    struct ftrace_regs *fregs, void *data)
 {
-	unsigned long ret = regs_return_value(regs);
+	unsigned long ret = ftrace_regs_get_return_value(fregs);
 
 	KUNIT_EXPECT_FALSE(current_test, preemptible());
 	if (ip != target_ip) {
@@ -79,22 +72,6 @@ static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip,
 		KUNIT_EXPECT_NULL(current_test, data);
 }
 
-static notrace int nest_entry_handler(struct fprobe *fp, unsigned long ip,
-				      unsigned long ret_ip,
-				      struct pt_regs *regs, void *data)
-{
-	KUNIT_EXPECT_FALSE(current_test, preemptible());
-	return 0;
-}
-
-static notrace void nest_exit_handler(struct fprobe *fp, unsigned long ip,
-				      unsigned long ret_ip,
-				      struct pt_regs *regs, void *data)
-{
-	KUNIT_EXPECT_FALSE(current_test, preemptible());
-	KUNIT_EXPECT_EQ(current_test, ip, target_nest_ip);
-}
-
 /* Test entry only (no rethook) */
 static void test_fprobe_entry(struct kunit *test)
 {
@@ -191,25 +168,6 @@ static void test_fprobe_data(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp));
 }
 
-/* Test nr_maxactive */
-static void test_fprobe_nest(struct kunit *test)
-{
-	static const char *syms[] = {"fprobe_selftest_target", "fprobe_selftest_nest_target"};
-	struct fprobe fp = {
-		.entry_handler = nest_entry_handler,
-		.exit_handler = nest_exit_handler,
-		.nr_maxactive = 1,
-	};
-
-	current_test = test;
-	KUNIT_EXPECT_EQ(test, 0, register_fprobe_syms(&fp, syms, 2));
-
-	target_nest(rand1, target);
-	KUNIT_EXPECT_EQ(test, 1, fp.nmissed);
-
-	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp));
-}
-
 static void test_fprobe_skip(struct kunit *test)
 {
 	struct fprobe fp = {
@@ -247,10 +205,8 @@ static int fprobe_test_init(struct kunit *test)
 	rand1 = get_random_u32_above(div_factor);
 	target = fprobe_selftest_target;
 	target2 = fprobe_selftest_target2;
-	target_nest = fprobe_selftest_nest_target;
 	target_ip = get_ftrace_location(target);
 	target2_ip = get_ftrace_location(target2);
-	target_nest_ip = get_ftrace_location(target_nest);
 
 	return 0;
 }
@@ -260,7 +216,6 @@ static struct kunit_case fprobe_testcases[] = {
 	KUNIT_CASE(test_fprobe),
 	KUNIT_CASE(test_fprobe_syms),
 	KUNIT_CASE(test_fprobe_data),
-	KUNIT_CASE(test_fprobe_nest),
 	KUNIT_CASE(test_fprobe_skip),
 	{}
 };
diff --git a/lib/test_fpu.h b/lib/test_fpu.h
new file mode 100644
index 000000000000..4459807084bc
--- /dev/null
+++ b/lib/test_fpu.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _LIB_TEST_FPU_H
+#define _LIB_TEST_FPU_H
+
+int test_fpu(void);
+
+#endif
diff --git a/lib/test_fpu.c b/lib/test_fpu_glue.c
index e82db19fed84..c0596426370a 100644
--- a/lib/test_fpu.c
+++ b/lib/test_fpu_glue.c
@@ -17,39 +17,9 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/debugfs.h>
-#include <asm/fpu/api.h>
+#include <linux/fpu.h>
 
-static int test_fpu(void)
-{
-	/*
-	 * This sequence of operations tests that rounding mode is
-	 * to nearest and that denormal numbers are supported.
-	 * Volatile variables are used to avoid compiler optimizing
-	 * the calculations away.
-	 */
-	volatile double a, b, c, d, e, f, g;
-
-	a = 4.0;
-	b = 1e-15;
-	c = 1e-310;
-
-	/* Sets precision flag */
-	d = a + b;
-
-	/* Result depends on rounding mode */
-	e = a + b / 2;
-
-	/* Denormal and very large values */
-	f = b / c;
-
-	/* Depends on denormal support */
-	g = a + c * f;
-
-	if (d > a && e > a && g > a)
-		return 0;
-	else
-		return -EINVAL;
-}
+#include "test_fpu.h"
 
 static int test_fpu_get(void *data, u64 *val)
 {
@@ -68,8 +38,11 @@ static struct dentry *selftest_dir;
 
 static int __init test_fpu_init(void)
 {
+	if (!kernel_fpu_available())
+		return -EINVAL;
+
 	selftest_dir = debugfs_create_dir("selftest_helpers", NULL);
-	if (!selftest_dir)
+	if (IS_ERR(selftest_dir))
 		return -ENOMEM;
 
 	debugfs_create_file_unsafe("test_fpu", 0444, selftest_dir, NULL,
@@ -86,4 +59,5 @@ static void __exit test_fpu_exit(void)
 module_init(test_fpu_init);
 module_exit(test_fpu_exit);
 
+MODULE_DESCRIPTION("Test cases for floating point operations");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_fpu_impl.c b/lib/test_fpu_impl.c
new file mode 100644
index 000000000000..777894dbbe86
--- /dev/null
+++ b/lib/test_fpu_impl.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/errno.h>
+
+#include "test_fpu.h"
+
+int test_fpu(void)
+{
+	/*
+	 * This sequence of operations tests that rounding mode is
+	 * to nearest and that denormal numbers are supported.
+	 * Volatile variables are used to avoid compiler optimizing
+	 * the calculations away.
+	 */
+	volatile double a, b, c, d, e, f, g;
+
+	a = 4.0;
+	b = 1e-15;
+	c = 1e-310;
+
+	/* Sets precision flag */
+	d = a + b;
+
+	/* Result depends on rounding mode */
+	e = a + b / 2;
+
+	/* Denormal and very large values */
+	f = b / c;
+
+	/* Depends on denormal support */
+	g = a + c * f;
+
+	if (d > a && e > a && g > a)
+		return 0;
+	else
+		return -EINVAL;
+}
diff --git a/lib/test_free_pages.c b/lib/test_free_pages.c
index 9ebf6f5549f3..48952364c540 100644
--- a/lib/test_free_pages.c
+++ b/lib/test_free_pages.c
@@ -44,4 +44,5 @@ static void m_ex(void)
 module_init(m_in);
 module_exit(m_ex);
 MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_DESCRIPTION("Check that free_pages() doesn't leak memory");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_hash.c b/lib/test_hash.c
index bb25fda34794..a7af39662a0a 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -235,4 +235,5 @@ static struct kunit_suite hash_test_suite = {
 
 kunit_test_suite(hash_test_suite);
 
+MODULE_DESCRIPTION("Test cases for <linux/hash.h> and <linux/stringhash.h>");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c
index b916801f23a8..751645645988 100644
--- a/lib/test_hexdump.c
+++ b/lib/test_hexdump.c
@@ -113,7 +113,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize,
 			*p++ = ' ';
 		} while (p < test + rs * 2 + rs / gs + 1);
 
-		strncpy(p, data_a, l);
+		memcpy(p, data_a, l);
 		p += l;
 	}
 
@@ -253,4 +253,5 @@ static void __exit test_hexdump_exit(void)
 module_exit(test_hexdump_exit);
 
 MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_DESCRIPTION("Test cases for lib/hexdump.c module");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 717dcb830127..056f2e411d7b 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -799,10 +799,7 @@ static int dmirror_exclusive(struct dmirror *dmirror,
 		unsigned long mapped = 0;
 		int i;
 
-		if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
-			next = end;
-		else
-			next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
+		next = min(end, addr + (ARRAY_SIZE(pages) << PAGE_SHIFT));
 
 		ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
 		/*
@@ -1226,8 +1223,8 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
 	unsigned long *src_pfns;
 	unsigned long *dst_pfns;
 
-	src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
-	dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
+	src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
+	dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
 
 	migrate_device_range(src_pfns, start_pfn, npages);
 	for (i = 0; i < npages; i++) {
@@ -1250,8 +1247,8 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
 	}
 	migrate_device_pages(src_pfns, dst_pfns, npages);
 	migrate_device_finalize(src_pfns, dst_pfns, npages);
-	kfree(src_pfns);
-	kfree(dst_pfns);
+	kvfree(src_pfns);
+	kvfree(dst_pfns);
 }
 
 /* Removes free pages from the free list so they can't be re-allocated */
@@ -1550,4 +1547,5 @@ static void __exit hmm_dmirror_exit(void)
 
 module_init(hmm_dmirror_init);
 module_exit(hmm_dmirror_exit);
+MODULE_DESCRIPTION("HMM (Heterogeneous Memory Management) test module");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_ida.c b/lib/test_ida.c
index 072a49897e71..c80155a1956d 100644
--- a/lib/test_ida.c
+++ b/lib/test_ida.c
@@ -214,4 +214,5 @@ static void ida_exit(void)
 module_init(ida_checks);
 module_exit(ida_exit);
 MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_DESCRIPTION("Test the IDA API");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 1eec3b7ac67c..064ed0fce75a 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1223,4 +1223,5 @@ static void __exit test_kmod_exit(void)
 module_exit(test_kmod_exit);
 
 MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_DESCRIPTION("kmod stress test driver");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_kprobes.c b/lib/test_kprobes.c
index 0648f7154f5c..b7582010125c 100644
--- a/lib/test_kprobes.c
+++ b/lib/test_kprobes.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * test_kprobes.c - simple sanity test for *probes
+ * test_kprobes.c - simple sanity test for k*probes
  *
  * Copyright IBM Corp. 2008
  */
@@ -400,4 +400,5 @@ static struct kunit_suite kprobes_test_suite = {
 
 kunit_test_suites(&kprobes_test_suite);
 
+MODULE_DESCRIPTION("simple sanity test for k*probes");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_linear_ranges.c b/lib/test_linear_ranges.c
index c18f9c0f1f25..f482be00f1bc 100644
--- a/lib/test_linear_ranges.c
+++ b/lib/test_linear_ranges.c
@@ -216,4 +216,5 @@ static struct kunit_suite range_test_module = {
 
 kunit_test_suites(&range_test_module);
 
+MODULE_DESCRIPTION("KUnit test for the linear_ranges helper");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c
index cc5f335f29b5..30879abc8a42 100644
--- a/lib/test_list_sort.c
+++ b/lib/test_list_sort.c
@@ -119,4 +119,5 @@ static struct kunit_suite list_sort_suite = {
 
 kunit_test_suites(&list_sort_suite);
 
+MODULE_DESCRIPTION("list_sort() KUnit test suite");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 399380db449c..13e2a10d7554 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -1387,6 +1387,92 @@ static noinline void __init check_prev_entry(struct maple_tree *mt)
 	mas_unlock(&mas);
 }
 
+static noinline void __init check_store_null(struct maple_tree *mt)
+{
+	MA_STATE(mas, mt, 0, ULONG_MAX);
+
+	/*
+	 * Store NULL at range [0, ULONG_MAX] to an empty tree should result
+	 * in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at any range to an empty tree should result in an empty
+	 * tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set_range(&mas, 3, 10);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [0, ULONG_MAX] to a single entry tree should
+	 * result in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set(&mas, 0);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+	mas_set_range(&mas, 0, ULONG_MAX);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [0, n] to a single entry tree should
+	 * result in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set(&mas, 0);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+	mas_set_range(&mas, 0, 5);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [m, n] where m > 0 to a single entry tree
+	 * should still be a single entry tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set(&mas, 0);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+	mas_set_range(&mas, 2, 5);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, mtree_empty(mt));
+//	MT_BUG_ON(mt, xa_is_node(mas_root(&mas)));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	/*
+	 * Store NULL at range [0, ULONG_MAX] to a tree with node should
+	 * result in an empty tree
+	 */
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	mas_set_range(&mas, 1, 3);
+	mas_store_gfp(&mas, &mas, GFP_KERNEL);
+//	MT_BUG_ON(mt, !xa_is_node(mas_root(&mas)));
+	mas_set_range(&mas, 0, ULONG_MAX);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+	MT_BUG_ON(mt, !mtree_empty(mt));
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+}
+
 static noinline void __init check_root_expand(struct maple_tree *mt)
 {
 	MA_STATE(mas, mt, 0, 0);
@@ -1477,6 +1563,30 @@ static noinline void __init check_root_expand(struct maple_tree *mt)
 	mas_unlock(&mas);
 }
 
+static noinline void __init check_deficient_node(struct maple_tree *mt)
+{
+	MA_STATE(mas, mt, 0, 0);
+	int count;
+
+	mas_lock(&mas);
+	for (count = 0; count < 10; count++) {
+		mas_set(&mas, count);
+		mas_store_gfp(&mas, xa_mk_value(count), GFP_KERNEL);
+	}
+
+	for (count = 20; count < 39; count++) {
+		mas_set(&mas, count);
+		mas_store_gfp(&mas, xa_mk_value(count), GFP_KERNEL);
+	}
+
+	for (count = 10; count < 12; count++) {
+		mas_set(&mas, count);
+		mas_store_gfp(&mas, xa_mk_value(count), GFP_KERNEL);
+	}
+	mas_unlock(&mas);
+	mt_validate(mt);
+}
+
 static noinline void __init check_gap_combining(struct maple_tree *mt)
 {
 	struct maple_enode *mn1, *mn2;
@@ -3628,6 +3738,34 @@ static noinline void __init alloc_cyclic_testing(struct maple_tree *mt)
 	}
 
 	mtree_destroy(mt);
+
+	/*
+	 * Issue with reverse search was discovered
+	 * https://lore.kernel.org/all/20241216060600.287B4C4CED0@smtp.kernel.org/
+	 * Exhausting the allocation area and forcing the search to wrap needs a
+	 * mas_reset() in mas_alloc_cyclic().
+	 */
+	next = 0;
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	for (int i = 0; i < 1023; i++) {
+		mtree_alloc_cyclic(mt, &location, mt, 2, 1024, &next, GFP_KERNEL);
+		MT_BUG_ON(mt, i != location - 2);
+		MT_BUG_ON(mt, i != next - 3);
+		MT_BUG_ON(mt, mtree_load(mt, location) != mt);
+	}
+	mtree_erase(mt, 123);
+	MT_BUG_ON(mt, mtree_load(mt, 123) != NULL);
+	mtree_alloc_cyclic(mt, &location, mt, 2, 1024, &next, GFP_KERNEL);
+	MT_BUG_ON(mt, 123 != location);
+	MT_BUG_ON(mt, 124 != next);
+	MT_BUG_ON(mt, mtree_load(mt, location) != mt);
+	mtree_erase(mt, 100);
+	mtree_alloc_cyclic(mt, &location, mt, 2, 1024, &next, GFP_KERNEL);
+	MT_BUG_ON(mt, 100 != location);
+	MT_BUG_ON(mt, 101 != next);
+	MT_BUG_ON(mt, mtree_load(mt, location) != mt);
+	mtree_destroy(mt);
+
 	/* Overflow test */
 	next = ULONG_MAX - 1;
 	ret = mtree_alloc_cyclic(mt, &location, mt, 2, ULONG_MAX, &next, GFP_KERNEL);
@@ -3711,6 +3849,14 @@ static int __init maple_tree_seed(void)
 #endif
 
 	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+	check_deficient_node(&tree);
+	mtree_destroy(&tree);
+
+	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+	check_store_null(&tree);
+	mtree_destroy(&tree);
+
+	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
 	check_root_expand(&tree);
 	mtree_destroy(&tree);
 
@@ -3946,4 +4092,5 @@ static void __exit maple_tree_harvest(void)
 module_init(maple_tree_seed);
 module_exit(maple_tree_harvest);
 MODULE_AUTHOR("Liam R. Howlett <Liam.Howlett@Oracle.com>");
+MODULE_DESCRIPTION("maple tree API test module");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_memcat_p.c b/lib/test_memcat_p.c
index 849c477d49d0..7e0797a6bebf 100644
--- a/lib/test_memcat_p.c
+++ b/lib/test_memcat_p.c
@@ -112,4 +112,5 @@ static void __exit test_memcat_p_exit(void)
 module_init(test_memcat_p_init);
 module_exit(test_memcat_p_exit);
 
+MODULE_DESCRIPTION("Test cases for memcat_p() in lib/memcat_p.c");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index 0dc173849a54..6298f66c964b 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -436,4 +436,5 @@ static int __init test_meminit_init(void)
 }
 module_init(test_meminit_init);
 
+MODULE_DESCRIPTION("Test cases for SL[AOU]B/page initialization at alloc/free time");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c
index 7b01b4387cfb..a9c4a74d3898 100644
--- a/lib/test_min_heap.c
+++ b/lib/test_min_heap.c
@@ -11,26 +11,20 @@
 #include <linux/printk.h>
 #include <linux/random.h>
 
-static __init bool less_than(const void *lhs, const void *rhs)
+DEFINE_MIN_HEAP(int, min_heap_test);
+
+static __init bool less_than(const void *lhs, const void *rhs, void __always_unused *args)
 {
 	return *(int *)lhs < *(int *)rhs;
 }
 
-static __init bool greater_than(const void *lhs, const void *rhs)
+static __init bool greater_than(const void *lhs, const void *rhs, void __always_unused *args)
 {
 	return *(int *)lhs > *(int *)rhs;
 }
 
-static __init void swap_ints(void *lhs, void *rhs)
-{
-	int temp = *(int *)lhs;
-
-	*(int *)lhs = *(int *)rhs;
-	*(int *)rhs = temp;
-}
-
 static __init int pop_verify_heap(bool min_heap,
-				struct min_heap *heap,
+				struct min_heap_test *heap,
 				const struct min_heap_callbacks *funcs)
 {
 	int *values = heap->data;
@@ -38,7 +32,7 @@ static __init int pop_verify_heap(bool min_heap,
 	int last;
 
 	last = values[0];
-	min_heap_pop(heap, funcs);
+	min_heap_pop_inline(heap, funcs, NULL);
 	while (heap->nr > 0) {
 		if (min_heap) {
 			if (last > values[0]) {
@@ -54,7 +48,7 @@ static __init int pop_verify_heap(bool min_heap,
 			}
 		}
 		last = values[0];
-		min_heap_pop(heap, funcs);
+		min_heap_pop_inline(heap, funcs, NULL);
 	}
 	return err;
 }
@@ -63,20 +57,19 @@ static __init int test_heapify_all(bool min_heap)
 {
 	int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0,
 			 -3, -1, -2, -4, 0x8000000, 0x7FFFFFF };
-	struct min_heap heap = {
+	struct min_heap_test heap = {
 		.data = values,
 		.nr = ARRAY_SIZE(values),
 		.size =  ARRAY_SIZE(values),
 	};
 	struct min_heap_callbacks funcs = {
-		.elem_size = sizeof(int),
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, err;
 
 	/* Test with known set of values. */
-	min_heapify_all(&heap, &funcs);
+	min_heapify_all_inline(&heap, &funcs, NULL);
 	err = pop_verify_heap(min_heap, &heap, &funcs);
 
 
@@ -85,7 +78,7 @@ static __init int test_heapify_all(bool min_heap)
 	for (i = 0; i < heap.nr; i++)
 		values[i] = get_random_u32();
 
-	min_heapify_all(&heap, &funcs);
+	min_heapify_all_inline(&heap, &funcs, NULL);
 	err += pop_verify_heap(min_heap, &heap, &funcs);
 
 	return err;
@@ -96,28 +89,27 @@ static __init int test_heap_push(bool min_heap)
 	const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0,
 			     -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF };
 	int values[ARRAY_SIZE(data)];
-	struct min_heap heap = {
+	struct min_heap_test heap = {
 		.data = values,
 		.nr = 0,
 		.size =  ARRAY_SIZE(values),
 	};
 	struct min_heap_callbacks funcs = {
-		.elem_size = sizeof(int),
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, temp, err;
 
 	/* Test with known set of values copied from data. */
 	for (i = 0; i < ARRAY_SIZE(data); i++)
-		min_heap_push(&heap, &data[i], &funcs);
+		min_heap_push_inline(&heap, &data[i], &funcs, NULL);
 
 	err = pop_verify_heap(min_heap, &heap, &funcs);
 
 	/* Test with randomly generated values. */
 	while (heap.nr < heap.size) {
 		temp = get_random_u32();
-		min_heap_push(&heap, &temp, &funcs);
+		min_heap_push_inline(&heap, &temp, &funcs, NULL);
 	}
 	err += pop_verify_heap(min_heap, &heap, &funcs);
 
@@ -129,43 +121,76 @@ static __init int test_heap_pop_push(bool min_heap)
 	const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0,
 			     -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF };
 	int values[ARRAY_SIZE(data)];
-	struct min_heap heap = {
+	struct min_heap_test heap = {
 		.data = values,
 		.nr = 0,
 		.size =  ARRAY_SIZE(values),
 	};
 	struct min_heap_callbacks funcs = {
-		.elem_size = sizeof(int),
 		.less = min_heap ? less_than : greater_than,
-		.swp = swap_ints,
+		.swp = NULL,
 	};
 	int i, temp, err;
 
 	/* Fill values with data to pop and replace. */
 	temp = min_heap ? 0x80000000 : 0x7FFFFFFF;
 	for (i = 0; i < ARRAY_SIZE(data); i++)
-		min_heap_push(&heap, &temp, &funcs);
+		min_heap_push_inline(&heap, &temp, &funcs, NULL);
 
 	/* Test with known set of values copied from data. */
 	for (i = 0; i < ARRAY_SIZE(data); i++)
-		min_heap_pop_push(&heap, &data[i], &funcs);
+		min_heap_pop_push_inline(&heap, &data[i], &funcs, NULL);
 
 	err = pop_verify_heap(min_heap, &heap, &funcs);
 
 	heap.nr = 0;
 	for (i = 0; i < ARRAY_SIZE(data); i++)
-		min_heap_push(&heap, &temp, &funcs);
+		min_heap_push_inline(&heap, &temp, &funcs, NULL);
 
 	/* Test with randomly generated values. */
 	for (i = 0; i < ARRAY_SIZE(data); i++) {
 		temp = get_random_u32();
-		min_heap_pop_push(&heap, &temp, &funcs);
+		min_heap_pop_push_inline(&heap, &temp, &funcs, NULL);
 	}
 	err += pop_verify_heap(min_heap, &heap, &funcs);
 
 	return err;
 }
 
+static __init int test_heap_del(bool min_heap)
+{
+	int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0,
+			 -3, -1, -2, -4, 0x8000000, 0x7FFFFFF };
+	struct min_heap_test heap;
+
+	min_heap_init_inline(&heap, values, ARRAY_SIZE(values));
+	heap.nr = ARRAY_SIZE(values);
+	struct min_heap_callbacks funcs = {
+		.less = min_heap ? less_than : greater_than,
+		.swp = NULL,
+	};
+	int i, err;
+
+	/* Test with known set of values. */
+	min_heapify_all_inline(&heap, &funcs, NULL);
+	for (i = 0; i < ARRAY_SIZE(values) / 2; i++)
+		min_heap_del_inline(&heap, get_random_u32() % heap.nr, &funcs, NULL);
+	err = pop_verify_heap(min_heap, &heap, &funcs);
+
+
+	/* Test with randomly generated values. */
+	heap.nr = ARRAY_SIZE(values);
+	for (i = 0; i < heap.nr; i++)
+		values[i] = get_random_u32();
+	min_heapify_all_inline(&heap, &funcs, NULL);
+
+	for (i = 0; i < ARRAY_SIZE(values) / 2; i++)
+		min_heap_del_inline(&heap, get_random_u32() % heap.nr, &funcs, NULL);
+	err += pop_verify_heap(min_heap, &heap, &funcs);
+
+	return err;
+}
+
 static int __init test_min_heap_init(void)
 {
 	int err = 0;
@@ -176,6 +201,8 @@ static int __init test_min_heap_init(void)
 	err += test_heap_push(false);
 	err += test_heap_pop_push(true);
 	err += test_heap_pop_push(false);
+	err += test_heap_del(true);
+	err += test_heap_del(false);
 	if (err) {
 		pr_err("test failed with %d errors\n", err);
 		return -EINVAL;
@@ -191,4 +218,5 @@ static void __exit test_min_heap_exit(void)
 }
 module_exit(test_min_heap_exit);
 
+MODULE_DESCRIPTION("Test cases for the min max heap");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_module.c b/lib/test_module.c
index debd19e35198..3d1b29b74807 100644
--- a/lib/test_module.c
+++ b/lib/test_module.c
@@ -31,4 +31,5 @@ static void __exit test_module_exit(void)
 module_exit(test_module_exit);
 
 MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_DESCRIPTION("module loading subsystem test module");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_objagg.c b/lib/test_objagg.c
index c0c957c50635..d34df4306b87 100644
--- a/lib/test_objagg.c
+++ b/lib/test_objagg.c
@@ -60,7 +60,7 @@ static struct objagg_obj *world_obj_get(struct world *world,
 	if (!world->key_refs[key_id_index(key_id)]) {
 		world->objagg_objs[key_id_index(key_id)] = objagg_obj;
 	} else if (world->objagg_objs[key_id_index(key_id)] != objagg_obj) {
-		pr_err("Key %u: God another object for the same key.\n",
+		pr_err("Key %u: Got another object for the same key.\n",
 		       key_id);
 		err = -EINVAL;
 		goto err_key_id_check;
diff --git a/lib/test_objpool.c b/lib/test_objpool.c
index bfdb81599832..896c0131c9a8 100644
--- a/lib/test_objpool.c
+++ b/lib/test_objpool.c
@@ -371,14 +371,10 @@ static int ot_start_sync(struct ot_test *test)
 		if (!cpu_online(cpu))
 			continue;
 
-		work = kthread_create_on_node(ot_thread_worker, item,
-				cpu_to_node(cpu), "ot_worker_%d", cpu);
-		if (IS_ERR(work)) {
+		work = kthread_run_on_cpu(ot_thread_worker, item,
+					  cpu, "ot_worker_%d");
+		if (IS_ERR(work))
 			pr_err("failed to create thread for cpu %d\n", cpu);
-		} else {
-			kthread_bind(work, cpu);
-			wake_up_process(work);
-		}
 	}
 
 	/* wait a while to make sure all threads waiting at start line */
@@ -562,14 +558,9 @@ static int ot_start_async(struct ot_test *test)
 		if (!cpu_online(cpu))
 			continue;
 
-		work = kthread_create_on_node(ot_thread_worker, item,
-				cpu_to_node(cpu), "ot_worker_%d", cpu);
-		if (IS_ERR(work)) {
+		work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d");
+		if (IS_ERR(work))
 			pr_err("failed to create thread for cpu %d\n", cpu);
-		} else {
-			kthread_bind(work, cpu);
-			wake_up_process(work);
-		}
 	}
 
 	/* wait a while to make sure all threads waiting at start line */
@@ -687,4 +678,5 @@ static void __exit ot_mod_exit(void)
 module_init(ot_mod_init);
 module_exit(ot_mod_exit);
 
-MODULE_LICENSE("GPL");
-\ No newline at end of file
+MODULE_DESCRIPTION("Test module for lockless object pool");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_parman.c b/lib/test_parman.c
index 35e32243693c..f9b97426a337 100644
--- a/lib/test_parman.c
+++ b/lib/test_parman.c
@@ -39,7 +39,7 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/err.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/parman.h>
 
 #define TEST_PARMAN_PRIO_SHIFT 7 /* defines number of prios for testing */
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 69b6a5e177f2..59dbe4f9a4cb 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -386,6 +386,66 @@ kernel_ptr(void)
 static void __init
 struct_resource(void)
 {
+	struct resource test_resource = {
+		.start = 0xc0ffee00,
+		.end = 0xc0ffee00,
+		.flags = IORESOURCE_MEM,
+	};
+
+	test("[mem 0xc0ffee00 flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xc0ffee,
+		.end = 0xba5eba11,
+		.flags = IORESOURCE_MEM,
+	};
+	test("[mem 0x00c0ffee-0xba5eba11 flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xba5eba11,
+		.end = 0xc0ffee,
+		.flags = IORESOURCE_MEM,
+	};
+	test("[mem 0xba5eba11-0x00c0ffee flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xba5eba11,
+		.end = 0xba5eca11,
+		.flags = IORESOURCE_MEM,
+	};
+
+	test("[mem 0xba5eba11-0xba5eca11 flags 0x200]",
+	     "%pr", &test_resource);
+
+	test_resource = (struct resource) {
+		.start = 0xba11,
+		.end = 0xca10,
+		.flags = IORESOURCE_IO |
+			 IORESOURCE_DISABLED |
+			 IORESOURCE_UNSET,
+	};
+
+	test("[io  size 0x1000 disabled]",
+	     "%pR", &test_resource);
+}
+
+static void __init
+struct_range(void)
+{
+	struct range test_range = DEFINE_RANGE(0xc0ffee00ba5eba11,
+					       0xc0ffee00ba5eba11);
+	test("[range 0xc0ffee00ba5eba11]", "%pra", &test_range);
+
+	test_range = DEFINE_RANGE(0xc0ffee, 0xba5eba11);
+	test("[range 0x0000000000c0ffee-0x00000000ba5eba11]",
+	     "%pra", &test_range);
+
+	test_range = DEFINE_RANGE(0xba5eba11, 0xc0ffee);
+	test("[range 0x00000000ba5eba11-0x0000000000c0ffee]",
+	     "%pra", &test_range);
 }
 
 static void __init
@@ -641,26 +701,12 @@ page_flags_test(int section, int node, int zone, int last_cpupid,
 	test(cmp_buf, "%pGp", &flags);
 }
 
-static void __init page_type_test(unsigned int page_type, const char *name,
-				  char *cmp_buf)
-{
-	unsigned long size;
-
-	size = scnprintf(cmp_buf, BUF_SIZE, "%#x(", page_type);
-	if (page_type_has_type(page_type))
-		size += scnprintf(cmp_buf + size, BUF_SIZE - size, "%s", name);
-
-	snprintf(cmp_buf + size, BUF_SIZE - size, ")");
-	test(cmp_buf, "%pGt", &page_type);
-}
-
 static void __init
 flags(void)
 {
 	unsigned long flags;
 	char *cmp_buffer;
 	gfp_t gfp;
-	unsigned int page_type;
 
 	cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
 	if (!cmp_buffer)
@@ -700,18 +746,6 @@ flags(void)
 	gfp |= __GFP_HIGH;
 	test(cmp_buffer, "%pGg", &gfp);
 
-	page_type = ~0;
-	page_type_test(page_type, "", cmp_buffer);
-
-	page_type = 10;
-	page_type_test(page_type, "", cmp_buffer);
-
-	page_type = ~PG_buddy;
-	page_type_test(page_type, "buddy", cmp_buffer);
-
-	page_type = ~(PG_table | PG_buddy);
-	page_type_test(page_type, "table|buddy", cmp_buffer);
-
 	kfree(cmp_buffer);
 }
 
@@ -789,6 +823,7 @@ test_pointer(void)
 	symbol_ptr();
 	kernel_ptr();
 	struct_resource();
+	struct_range();
 	addr();
 	escaped_str();
 	hex_string();
@@ -824,4 +859,5 @@ static void __init selftest(void)
 
 KSTM_MODULE_LOADERS(test_printf);
 MODULE_AUTHOR("Rasmus Villemoes <linux@rasmusvillemoes.dk>");
+MODULE_DESCRIPTION("Test cases for printf facility");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_ref_tracker.c b/lib/test_ref_tracker.c
index 49970a7c96f3..b983ceb12afc 100644
--- a/lib/test_ref_tracker.c
+++ b/lib/test_ref_tracker.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Referrence tracker self test.
+ * Reference tracker self test.
  *
  * Copyright (c) 2021 Eric Dumazet <edumazet@google.com>
  */
@@ -112,4 +112,5 @@ static void __exit test_ref_tracker_exit(void)
 module_init(test_ref_tracker_init);
 module_exit(test_ref_tracker_exit);
 
+MODULE_DESCRIPTION("Reference tracker self test");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 42b585208249..c63db03ebb9d 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -811,4 +811,5 @@ static void __exit test_rht_exit(void)
 module_init(test_rht_init);
 module_exit(test_rht_exit);
 
+MODULE_DESCRIPTION("Resizable, Scalable, Concurrent Hash Table test module");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index a2707af2951a..44f8508c9d88 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/overflow.h>
 #include <linux/printk.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -810,4 +810,5 @@ static void __init selftest(void)
 
 KSTM_MODULE_LOADERS(test_scanf);
 MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_DESCRIPTION("Test cases for sscanf facility");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/test_sort.c b/lib/test_sort.c
index be02e3a098cf..cd4a338d1153 100644
--- a/lib/test_sort.c
+++ b/lib/test_sort.c
@@ -29,7 +29,19 @@ static void test_sort(struct kunit *test)
 
 	sort(a, TEST_LEN, sizeof(*a), cmpint, NULL);
 
-	for (i = 0; i < TEST_LEN-1; i++)
+	for (i = 0; i < TEST_LEN - 1; i++)
+		KUNIT_ASSERT_LE(test, a[i], a[i + 1]);
+
+	r = 48;
+
+	for (i = 0; i < TEST_LEN - 1; i++) {
+		r = (r * 725861) % 6599;
+		a[i] = r;
+	}
+
+	sort(a, TEST_LEN - 1, sizeof(*a), cmpint, NULL);
+
+	for (i = 0; i < TEST_LEN - 2; i++)
 		KUNIT_ASSERT_LE(test, a[i], a[i + 1]);
 }
 
@@ -45,4 +57,5 @@ static struct kunit_suite sort_test_suite = {
 
 kunit_test_suites(&sort_test_suite);
 
+MODULE_DESCRIPTION("sort() KUnit test suite");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_static_key_base.c b/lib/test_static_key_base.c
index 5089a2e2bdd8..9f507672afa5 100644
--- a/lib/test_static_key_base.c
+++ b/lib/test_static_key_base.c
@@ -57,4 +57,5 @@ module_init(test_static_key_base_init);
 module_exit(test_static_key_base_exit);
 
 MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_DESCRIPTION("Kernel module to support testing static keys");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
index 42daa74be029..00c715f30df9 100644
--- a/lib/test_static_keys.c
+++ b/lib/test_static_keys.c
@@ -236,4 +236,5 @@ module_init(test_static_key_init);
 module_exit(test_static_key_exit);
 
 MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_DESCRIPTION("Kernel module for testing static keys");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 9321d850931f..4249e0cc8aaf 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -71,7 +71,7 @@ static struct test_sysctl_data test_data = {
 };
 
 /* These are all under /proc/sys/debug/test_sysctl/ */
-static struct ctl_table test_table[] = {
+static const struct ctl_table test_table[] = {
 	{
 		.procname	= "int_0001",
 		.data		= &test_data.int_0001,
@@ -177,7 +177,7 @@ static int test_sysctl_setup_node_tests(void)
 }
 
 /* Used to test that unregister actually removes the directory */
-static struct ctl_table test_table_unregister[] = {
+static const struct ctl_table test_table_unregister[] = {
 	{
 		.procname	= "unregister_error",
 		.data		= &test_data.int_0001,
@@ -220,7 +220,7 @@ static int test_sysctl_run_register_mount_point(void)
 	return 0;
 }
 
-static struct ctl_table test_table_empty[] = { };
+static const struct ctl_table test_table_empty[] = { };
 
 static int test_sysctl_run_register_empty(void)
 {
@@ -280,4 +280,5 @@ static void __exit test_sysctl_exit(void)
 module_exit(test_sysctl_exit);
 
 MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_DESCRIPTION("proc sysctl test driver");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index c288df9372ed..5d7b10e98610 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -156,4 +156,5 @@ static void __exit test_ubsan_exit(void)
 module_exit(test_ubsan_exit);
 
 MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>");
+MODULE_DESCRIPTION("UBSAN unit test");
 MODULE_LICENSE("GPL v2");
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
deleted file mode 100644
index 5ff04d8fe971..000000000000
--- a/lib/test_user_copy.c
+++ /dev/null
@@ -1,331 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Kernel module for testing copy_to/from_user infrastructure.
- *
- * Copyright 2013 Google Inc. All Rights Reserved
- *
- * Authors:
- *      Kees Cook       <keescook@chromium.org>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-
-/*
- * Several 32-bit architectures support 64-bit {get,put}_user() calls.
- * As there doesn't appear to be anything that can safely determine
- * their capability at compile-time, we just have to opt-out certain archs.
- */
-#if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \
-			    !defined(CONFIG_M68K) &&		\
-			    !defined(CONFIG_MICROBLAZE) &&	\
-			    !defined(CONFIG_NIOS2) &&		\
-			    !defined(CONFIG_PPC32) &&		\
-			    !defined(CONFIG_SUPERH))
-# define TEST_U64
-#endif
-
-#define test(condition, msg, ...)					\
-({									\
-	int cond = (condition);						\
-	if (cond)							\
-		pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__);	\
-	cond;								\
-})
-
-static bool is_zeroed(void *from, size_t size)
-{
-	return memchr_inv(from, 0x0, size) == NULL;
-}
-
-static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size)
-{
-	int ret = 0;
-	size_t start, end, i, zero_start, zero_end;
-
-	if (test(size < 2 * PAGE_SIZE, "buffer too small"))
-		return -EINVAL;
-
-	/*
-	 * We want to cross a page boundary to exercise the code more
-	 * effectively. We also don't want to make the size we scan too large,
-	 * otherwise the test can take a long time and cause soft lockups. So
-	 * scan a 1024 byte region across the page boundary.
-	 */
-	size = 1024;
-	start = PAGE_SIZE - (size / 2);
-
-	kmem += start;
-	umem += start;
-
-	zero_start = size / 4;
-	zero_end = size - zero_start;
-
-	/*
-	 * We conduct a series of check_nonzero_user() tests on a block of
-	 * memory with the following byte-pattern (trying every possible
-	 * [start,end] pair):
-	 *
-	 *   [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ]
-	 *
-	 * And we verify that check_nonzero_user() acts identically to
-	 * memchr_inv().
-	 */
-
-	memset(kmem, 0x0, size);
-	for (i = 1; i < zero_start; i += 2)
-		kmem[i] = 0xff;
-	for (i = zero_end; i < size; i += 2)
-		kmem[i] = 0xff;
-
-	ret |= test(copy_to_user(umem, kmem, size),
-		    "legitimate copy_to_user failed");
-
-	for (start = 0; start <= size; start++) {
-		for (end = start; end <= size; end++) {
-			size_t len = end - start;
-			int retval = check_zeroed_user(umem + start, len);
-			int expected = is_zeroed(kmem + start, len);
-
-			ret |= test(retval != expected,
-				    "check_nonzero_user(=%d) != memchr_inv(=%d) mismatch (start=%zu, end=%zu)",
-				    retval, expected, start, end);
-		}
-	}
-
-	return ret;
-}
-
-static int test_copy_struct_from_user(char *kmem, char __user *umem,
-				      size_t size)
-{
-	int ret = 0;
-	char *umem_src = NULL, *expected = NULL;
-	size_t ksize, usize;
-
-	umem_src = kmalloc(size, GFP_KERNEL);
-	ret = test(umem_src == NULL, "kmalloc failed");
-	if (ret)
-		goto out_free;
-
-	expected = kmalloc(size, GFP_KERNEL);
-	ret = test(expected == NULL, "kmalloc failed");
-	if (ret)
-		goto out_free;
-
-	/* Fill umem with a fixed byte pattern. */
-	memset(umem_src, 0x3e, size);
-	ret |= test(copy_to_user(umem, umem_src, size),
-		    "legitimate copy_to_user failed");
-
-	/* Check basic case -- (usize == ksize). */
-	ksize = size;
-	usize = size;
-
-	memcpy(expected, umem_src, ksize);
-
-	memset(kmem, 0x0, size);
-	ret |= test(copy_struct_from_user(kmem, ksize, umem, usize),
-		    "copy_struct_from_user(usize == ksize) failed");
-	ret |= test(memcmp(kmem, expected, ksize),
-		    "copy_struct_from_user(usize == ksize) gives unexpected copy");
-
-	/* Old userspace case -- (usize < ksize). */
-	ksize = size;
-	usize = size / 2;
-
-	memcpy(expected, umem_src, usize);
-	memset(expected + usize, 0x0, ksize - usize);
-
-	memset(kmem, 0x0, size);
-	ret |= test(copy_struct_from_user(kmem, ksize, umem, usize),
-		    "copy_struct_from_user(usize < ksize) failed");
-	ret |= test(memcmp(kmem, expected, ksize),
-		    "copy_struct_from_user(usize < ksize) gives unexpected copy");
-
-	/* New userspace (-E2BIG) case -- (usize > ksize). */
-	ksize = size / 2;
-	usize = size;
-
-	memset(kmem, 0x0, size);
-	ret |= test(copy_struct_from_user(kmem, ksize, umem, usize) != -E2BIG,
-		    "copy_struct_from_user(usize > ksize) didn't give E2BIG");
-
-	/* New userspace (success) case -- (usize > ksize). */
-	ksize = size / 2;
-	usize = size;
-
-	memcpy(expected, umem_src, ksize);
-	ret |= test(clear_user(umem + ksize, usize - ksize),
-		    "legitimate clear_user failed");
-
-	memset(kmem, 0x0, size);
-	ret |= test(copy_struct_from_user(kmem, ksize, umem, usize),
-		    "copy_struct_from_user(usize > ksize) failed");
-	ret |= test(memcmp(kmem, expected, ksize),
-		    "copy_struct_from_user(usize > ksize) gives unexpected copy");
-
-out_free:
-	kfree(expected);
-	kfree(umem_src);
-	return ret;
-}
-
-static int __init test_user_copy_init(void)
-{
-	int ret = 0;
-	char *kmem;
-	char __user *usermem;
-	char *bad_usermem;
-	unsigned long user_addr;
-	u8 val_u8;
-	u16 val_u16;
-	u32 val_u32;
-#ifdef TEST_U64
-	u64 val_u64;
-#endif
-
-	kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
-	if (!kmem)
-		return -ENOMEM;
-
-	user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2,
-			    PROT_READ | PROT_WRITE | PROT_EXEC,
-			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
-	if (user_addr >= (unsigned long)(TASK_SIZE)) {
-		pr_warn("Failed to allocate user memory\n");
-		kfree(kmem);
-		return -ENOMEM;
-	}
-
-	usermem = (char __user *)user_addr;
-	bad_usermem = (char *)user_addr;
-
-	/*
-	 * Legitimate usage: none of these copies should fail.
-	 */
-	memset(kmem, 0x3a, PAGE_SIZE * 2);
-	ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE),
-		    "legitimate copy_to_user failed");
-	memset(kmem, 0x0, PAGE_SIZE);
-	ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE),
-		    "legitimate copy_from_user failed");
-	ret |= test(memcmp(kmem, kmem + PAGE_SIZE, PAGE_SIZE),
-		    "legitimate usercopy failed to copy data");
-
-#define test_legit(size, check)						  \
-	do {								  \
-		val_##size = check;					  \
-		ret |= test(put_user(val_##size, (size __user *)usermem), \
-		    "legitimate put_user (" #size ") failed");		  \
-		val_##size = 0;						  \
-		ret |= test(get_user(val_##size, (size __user *)usermem), \
-		    "legitimate get_user (" #size ") failed");		  \
-		ret |= test(val_##size != check,			  \
-		    "legitimate get_user (" #size ") failed to do copy"); \
-		if (val_##size != check) {				  \
-			pr_info("0x%llx != 0x%llx\n",			  \
-				(unsigned long long)val_##size,		  \
-				(unsigned long long)check);		  \
-		}							  \
-	} while (0)
-
-	test_legit(u8,  0x5a);
-	test_legit(u16, 0x5a5b);
-	test_legit(u32, 0x5a5b5c5d);
-#ifdef TEST_U64
-	test_legit(u64, 0x5a5b5c5d6a6b6c6d);
-#endif
-#undef test_legit
-
-	/* Test usage of check_nonzero_user(). */
-	ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE);
-	/* Test usage of copy_struct_from_user(). */
-	ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE);
-
-	/*
-	 * Invalid usage: none of these copies should succeed.
-	 */
-
-	/* Prepare kernel memory with check values. */
-	memset(kmem, 0x5a, PAGE_SIZE);
-	memset(kmem + PAGE_SIZE, 0, PAGE_SIZE);
-
-	/* Reject kernel-to-kernel copies through copy_from_user(). */
-	ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE),
-				    PAGE_SIZE),
-		    "illegal all-kernel copy_from_user passed");
-
-	/* Destination half of buffer should have been zeroed. */
-	ret |= test(memcmp(kmem + PAGE_SIZE, kmem, PAGE_SIZE),
-		    "zeroing failure for illegal all-kernel copy_from_user");
-
-#if 0
-	/*
-	 * When running with SMAP/PAN/etc, this will Oops the kernel
-	 * due to the zeroing of userspace memory on failure. This needs
-	 * to be tested in LKDTM instead, since this test module does not
-	 * expect to explode.
-	 */
-	ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem,
-				    PAGE_SIZE),
-		    "illegal reversed copy_from_user passed");
-#endif
-	ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE,
-				  PAGE_SIZE),
-		    "illegal all-kernel copy_to_user passed");
-	ret |= test(!copy_to_user((char __user *)kmem, bad_usermem,
-				  PAGE_SIZE),
-		    "illegal reversed copy_to_user passed");
-
-#define test_illegal(size, check)					    \
-	do {								    \
-		val_##size = (check);					    \
-		ret |= test(!get_user(val_##size, (size __user *)kmem),	    \
-		    "illegal get_user (" #size ") passed");		    \
-		ret |= test(val_##size != (size)0,			    \
-		    "zeroing failure for illegal get_user (" #size ")");    \
-		if (val_##size != (size)0) {				    \
-			pr_info("0x%llx != 0\n",			    \
-				(unsigned long long)val_##size);	    \
-		}							    \
-		ret |= test(!put_user(val_##size, (size __user *)kmem),	    \
-		    "illegal put_user (" #size ") passed");		    \
-	} while (0)
-
-	test_illegal(u8,  0x5a);
-	test_illegal(u16, 0x5a5b);
-	test_illegal(u32, 0x5a5b5c5d);
-#ifdef TEST_U64
-	test_illegal(u64, 0x5a5b5c5d6a6b6c6d);
-#endif
-#undef test_illegal
-
-	vm_munmap(user_addr, PAGE_SIZE * 2);
-	kfree(kmem);
-
-	if (ret == 0) {
-		pr_info("tests passed.\n");
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-module_init(test_user_copy_init);
-
-static void __exit test_user_copy_exit(void)
-{
-	pr_info("unloaded.\n");
-}
-
-module_exit(test_user_copy_exit);
-
-MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
-MODULE_LICENSE("GPL");
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
index cd819c397dc7..0124fad5d72c 100644
--- a/lib/test_uuid.c
+++ b/lib/test_uuid.c
@@ -130,4 +130,5 @@ static void __exit test_uuid_exit(void)
 module_exit(test_uuid_exit);
 
 MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_DESCRIPTION("Test cases for lib/uuid.c module");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 4ddf769861ff..f585949ff696 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -373,7 +373,7 @@ vm_map_ram_test(void)
 	if (!pages)
 		return -1;
 
-	nr_allocated = alloc_pages_bulk_array(GFP_KERNEL, map_nr_pages, pages);
+	nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages);
 	if (nr_allocated != map_nr_pages)
 		goto cleanup;
 
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 928fc20337e6..6932a26f4927 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1448,6 +1448,41 @@ static noinline void check_pause(struct xarray *xa)
 	XA_BUG_ON(xa, count != order_limit);
 
 	xa_destroy(xa);
+
+	index = 0;
+	for (order = XA_CHUNK_SHIFT; order > 0; order--) {
+		XA_BUG_ON(xa, xa_store_order(xa, index, order,
+					xa_mk_index(index), GFP_KERNEL));
+		index += 1UL << order;
+	}
+
+	index = 0;
+	count = 0;
+	xas_set(&xas, 0);
+	rcu_read_lock();
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		XA_BUG_ON(xa, entry != xa_mk_index(index));
+		index += 1UL << (XA_CHUNK_SHIFT - count);
+		count++;
+	}
+	rcu_read_unlock();
+	XA_BUG_ON(xa, count != XA_CHUNK_SHIFT);
+
+	index = 0;
+	count = 0;
+	xas_set(&xas, XA_CHUNK_SIZE / 2 + 1);
+	rcu_read_lock();
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		XA_BUG_ON(xa, entry != xa_mk_index(index));
+		index += 1UL << (XA_CHUNK_SHIFT - count);
+		count++;
+		xas_pause(&xas);
+	}
+	rcu_read_unlock();
+	XA_BUG_ON(xa, count != XA_CHUNK_SHIFT);
+
+	xa_destroy(xa);
+
 }
 
 static noinline void check_move_tiny(struct xarray *xa)
@@ -2001,6 +2036,97 @@ static noinline void check_get_order(struct xarray *xa)
 	}
 }
 
+static noinline void check_xas_get_order(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+	unsigned int order;
+	unsigned long i, j;
+
+	for (order = 0; order < max_order; order++) {
+		for (i = 0; i < 10; i++) {
+			xas_set_order(&xas, i << order, order);
+			do {
+				xas_lock(&xas);
+				xas_store(&xas, xa_mk_value(i));
+				xas_unlock(&xas);
+			} while (xas_nomem(&xas, GFP_KERNEL));
+
+			for (j = i << order; j < (i + 1) << order; j++) {
+				xas_set_order(&xas, j, 0);
+				rcu_read_lock();
+				xas_load(&xas);
+				XA_BUG_ON(xa, xas_get_order(&xas) != order);
+				rcu_read_unlock();
+			}
+
+			xas_lock(&xas);
+			xas_set_order(&xas, i << order, order);
+			xas_store(&xas, NULL);
+			xas_unlock(&xas);
+		}
+	}
+}
+
+static noinline void check_xas_conflict_get_order(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+
+	void *entry;
+	int only_once;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+	unsigned int order;
+	unsigned long i, j, k;
+
+	for (order = 0; order < max_order; order++) {
+		for (i = 0; i < 10; i++) {
+			xas_set_order(&xas, i << order, order);
+			do {
+				xas_lock(&xas);
+				xas_store(&xas, xa_mk_value(i));
+				xas_unlock(&xas);
+			} while (xas_nomem(&xas, GFP_KERNEL));
+
+			/*
+			 * Ensure xas_get_order works with xas_for_each_conflict.
+			 */
+			j = i << order;
+			for (k = 0; k < order; k++) {
+				only_once = 0;
+				xas_set_order(&xas, j + (1 << k), k);
+				xas_lock(&xas);
+				xas_for_each_conflict(&xas, entry) {
+					XA_BUG_ON(xa, entry != xa_mk_value(i));
+					XA_BUG_ON(xa, xas_get_order(&xas) != order);
+					only_once++;
+				}
+				XA_BUG_ON(xa, only_once != 1);
+				xas_unlock(&xas);
+			}
+
+			if (order < max_order - 1) {
+				only_once = 0;
+				xas_set_order(&xas, (i & ~1UL) << order, order + 1);
+				xas_lock(&xas);
+				xas_for_each_conflict(&xas, entry) {
+					XA_BUG_ON(xa, entry != xa_mk_value(i));
+					XA_BUG_ON(xa, xas_get_order(&xas) != order);
+					only_once++;
+				}
+				XA_BUG_ON(xa, only_once != 1);
+				xas_unlock(&xas);
+			}
+
+			xas_set_order(&xas, i << order, order);
+			xas_lock(&xas);
+			xas_store(&xas, NULL);
+			xas_unlock(&xas);
+		}
+	}
+}
+
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -2052,6 +2178,8 @@ static int xarray_checks(void)
 	check_multi_store(&array);
 	check_multi_store_advanced(&array);
 	check_get_order(&array);
+	check_xas_get_order(&array);
+	check_xas_conflict_get_order(&array);
 	check_xa_alloc();
 	check_find(&array);
 	check_find_entry(&array);
@@ -2080,4 +2208,5 @@ static void xarray_exit(void)
 module_init(xarray_checks);
 module_exit(xarray_exit);
 MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_DESCRIPTION("XArray API test module");
 MODULE_LICENSE("GPL");
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
new file mode 100644
index 000000000000..8e4f42cb9c54
--- /dev/null
+++ b/lib/tests/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TEST_RUNTIME_MODULE)		+= module/
diff --git a/lib/tests/module/.gitignore b/lib/tests/module/.gitignore
new file mode 100644
index 000000000000..8be7891b250f
--- /dev/null
+++ b/lib/tests/module/.gitignore
@@ -0,0 +1,4 @@
+test_kallsyms_a.c
+test_kallsyms_b.c
+test_kallsyms_c.c
+test_kallsyms_d.c
diff --git a/lib/tests/module/Makefile b/lib/tests/module/Makefile
new file mode 100644
index 000000000000..2f3e1a772c2c
--- /dev/null
+++ b/lib/tests/module/Makefile
@@ -0,0 +1,14 @@
+obj-$(CONFIG_TEST_KALLSYMS_A) += test_kallsyms_a.o
+obj-$(CONFIG_TEST_KALLSYMS_B) += test_kallsyms_b.o
+obj-$(CONFIG_TEST_KALLSYMS_C) += test_kallsyms_c.o
+obj-$(CONFIG_TEST_KALLSYMS_D) += test_kallsyms_d.o
+
+quiet_cmd_gen_test_kallsyms = GEN     $@
+	cmd_gen_test_kallsyms = $< $@ \
+	$(CONFIG_TEST_KALLSYMS_NUMSYMS) \
+	$(CONFIG_TEST_KALLSYMS_SCALE_FACTOR)
+
+$(obj)/%.c: $(src)/gen_test_kallsyms.sh FORCE
+	$(call if_changed,gen_test_kallsyms)
+
+targets += $(foreach x, a b c d, test_kallsyms_$(x).c)
diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh
new file mode 100755
index 000000000000..561dcac0f359
--- /dev/null
+++ b/lib/tests/module/gen_test_kallsyms.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+
+TARGET=$(basename $1)
+DIR=lib/tests/module
+TARGET="$DIR/$TARGET"
+NUM_SYMS=$2
+SCALE_FACTOR=$3
+TEST_TYPE=$(echo $TARGET | sed -e 's|lib/tests/module/test_kallsyms_||g')
+TEST_TYPE=$(echo $TEST_TYPE | sed -e 's|.c||g')
+FIRST_B_LOOKUP=1
+
+if [[ $NUM_SYMS -gt 2 ]]; then
+	FIRST_B_LOOKUP=$((NUM_SYMS/2))
+fi
+
+gen_template_module_header()
+{
+	cat <<____END_MODULE
+// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
+/*
+ * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
+ *
+ * Automatically generated code for testing, do not edit manually.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+____END_MODULE
+}
+
+gen_num_syms()
+{
+	PREFIX=$1
+	NUM=$2
+	for i in $(seq 1 $NUM); do
+		printf "int auto_test_%s_%010d = 0;\n" $PREFIX $i
+		printf "EXPORT_SYMBOL_GPL(auto_test_%s_%010d);\n" $PREFIX $i
+	done
+	echo
+}
+
+gen_template_module_data_a()
+{
+	gen_num_syms a $1
+	cat <<____END_MODULE
+static int auto_runtime_test(void)
+{
+	return 0;
+}
+
+____END_MODULE
+}
+
+gen_template_module_data_b()
+{
+	printf "\nextern int auto_test_a_%010d;\n\n" $FIRST_B_LOOKUP
+	echo "static int auto_runtime_test(void)"
+	echo "{"
+	printf "\nreturn auto_test_a_%010d;\n" $FIRST_B_LOOKUP
+	echo "}"
+}
+
+gen_template_module_data_c()
+{
+	gen_num_syms c $1
+	cat <<____END_MODULE
+static int auto_runtime_test(void)
+{
+	return 0;
+}
+
+____END_MODULE
+}
+
+gen_template_module_data_d()
+{
+	gen_num_syms d $1
+	cat <<____END_MODULE
+static int auto_runtime_test(void)
+{
+	return 0;
+}
+
+____END_MODULE
+}
+
+gen_template_module_exit()
+{
+	cat <<____END_MODULE
+static int __init auto_test_module_init(void)
+{
+	return auto_runtime_test();
+}
+module_init(auto_test_module_init);
+
+static void __exit auto_test_module_exit(void)
+{
+}
+module_exit(auto_test_module_exit);
+
+MODULE_AUTHOR("Luis Chamberlain <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Test module for kallsyms");
+____END_MODULE
+}
+
+case $TEST_TYPE in
+	a)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_a $NUM_SYMS >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	b)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_b >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	c)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_c $((NUM_SYMS * SCALE_FACTOR)) >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	d)
+		gen_template_module_header > $TARGET
+		gen_template_module_data_d $((NUM_SYMS * SCALE_FACTOR * 2)) >> $TARGET
+		gen_template_module_exit >> $TARGET
+		;;
+	*)
+		;;
+esac
diff --git a/lib/ts_bm.c b/lib/ts_bm.c
index e5f30f9177df..eed5967238c5 100644
--- a/lib/ts_bm.c
+++ b/lib/ts_bm.c
@@ -216,6 +216,7 @@ static void __exit exit_bm(void)
 	textsearch_unregister(&bm_ops);
 }
 
+MODULE_DESCRIPTION("Boyer-Moore text search implementation");
 MODULE_LICENSE("GPL");
 
 module_init(init_bm);
diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c
index 64fd9015ad80..053615f4fcd7 100644
--- a/lib/ts_fsm.c
+++ b/lib/ts_fsm.c
@@ -331,6 +331,7 @@ static void __exit exit_fsm(void)
 	textsearch_unregister(&fsm_ops);
 }
 
+MODULE_DESCRIPTION("naive finite state machine text search");
 MODULE_LICENSE("GPL");
 
 module_init(init_fsm);
diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c
index c77a3d537f24..5520dc28255a 100644
--- a/lib/ts_kmp.c
+++ b/lib/ts_kmp.c
@@ -147,6 +147,7 @@ static void __exit exit_kmp(void)
 	textsearch_unregister(&kmp_ops);
 }
 
+MODULE_DESCRIPTION("Knuth-Morris-Pratt text search implementation");
 MODULE_LICENSE("GPL");
 
 module_init(init_kmp);
diff --git a/lib/union_find.c b/lib/union_find.c
new file mode 100644
index 000000000000..413b0f8adf7a
--- /dev/null
+++ b/lib/union_find.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/union_find.h>
+
+/**
+ * uf_find - Find the root of a node and perform path compression
+ * @node: the node to find the root of
+ *
+ * This function returns the root of the node by following the parent
+ * pointers. It also performs path compression, making the tree shallower.
+ *
+ * Returns the root node of the set containing node.
+ */
+struct uf_node *uf_find(struct uf_node *node)
+{
+	struct uf_node *parent;
+
+	while (node->parent != node) {
+		parent = node->parent;
+		node->parent = parent->parent;
+		node = parent;
+	}
+	return node;
+}
+
+/**
+ * uf_union - Merge two sets, using union by rank
+ * @node1: the first node
+ * @node2: the second node
+ *
+ * This function merges the sets containing node1 and node2, by comparing
+ * the ranks to keep the tree balanced.
+ */
+void uf_union(struct uf_node *node1, struct uf_node *node2)
+{
+	struct uf_node *root1 = uf_find(node1);
+	struct uf_node *root2 = uf_find(node2);
+
+	if (root1 == root2)
+		return;
+
+	if (root1->rank < root2->rank) {
+		root1->parent = root2;
+	} else if (root1->rank > root2->rank) {
+		root2->parent = root1;
+	} else {
+		root2->parent = root1;
+		root1->rank++;
+	}
+}
diff --git a/lib/usercopy.c b/lib/usercopy.c
index d29fe29c6849..7b17b83c8042 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,46 +1,29 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/fault-inject-usercopy.h>
 #include <linux/instrumented.h>
-#include <linux/uaccess.h>
+#include <linux/kernel.h>
 #include <linux/nospec.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/wordpart.h>
 
 /* out-of-line parts */
 
-#ifndef INLINE_COPY_FROM_USER
+#if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST)
 unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	unsigned long res = n;
-	might_fault();
-	if (!should_fail_usercopy() && likely(access_ok(from, n))) {
-		/*
-		 * Ensure that bad access_ok() speculation will not
-		 * lead to nasty side effects *after* the copy is
-		 * finished:
-		 */
-		barrier_nospec();
-		instrument_copy_from_user_before(to, from, n);
-		res = raw_copy_from_user(to, from, n);
-		instrument_copy_from_user_after(to, from, n, res);
-	}
-	if (unlikely(res))
-		memset(to + (n - res), 0, res);
-	return res;
+	return _inline_copy_from_user(to, from, n);
 }
 EXPORT_SYMBOL(_copy_from_user);
 #endif
 
-#ifndef INLINE_COPY_TO_USER
+#if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST)
 unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	might_fault();
-	if (should_fail_usercopy())
-		return n;
-	if (likely(access_ok(to, n))) {
-		instrument_copy_to_user(to, from, n);
-		n = raw_copy_to_user(to, from, n);
-	}
-	return n;
+	return _inline_copy_to_user(to, from, n);
 }
 EXPORT_SYMBOL(_copy_to_user);
 #endif
diff --git a/lib/usercopy_kunit.c b/lib/usercopy_kunit.c
new file mode 100644
index 000000000000..77fa00a13df7
--- /dev/null
+++ b/lib/usercopy_kunit.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel module for testing copy_to/from_user infrastructure.
+ *
+ * Copyright 2013 Google Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Kees Cook       <keescook@chromium.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <kunit/test.h>
+
+/*
+ * Several 32-bit architectures support 64-bit {get,put}_user() calls.
+ * As there doesn't appear to be anything that can safely determine
+ * their capability at compile-time, we just have to opt-out certain archs.
+ */
+#if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \
+			    !defined(CONFIG_M68K) &&		\
+			    !defined(CONFIG_MICROBLAZE) &&	\
+			    !defined(CONFIG_NIOS2) &&		\
+			    !defined(CONFIG_PPC32) &&		\
+			    !defined(CONFIG_SUPERH))
+# define TEST_U64
+#endif
+
+struct usercopy_test_priv {
+	char *kmem;
+	char __user *umem;
+	size_t size;
+};
+
+static bool is_zeroed(void *from, size_t size)
+{
+	return memchr_inv(from, 0x0, size) == NULL;
+}
+
+/* Test usage of check_nonzero_user(). */
+static void usercopy_test_check_nonzero_user(struct kunit *test)
+{
+	size_t start, end, i, zero_start, zero_end;
+	struct usercopy_test_priv *priv = test->priv;
+	char __user *umem = priv->umem;
+	char *kmem = priv->kmem;
+	size_t size = priv->size;
+
+	KUNIT_ASSERT_GE_MSG(test, size, 2 * PAGE_SIZE, "buffer too small");
+
+	/*
+	 * We want to cross a page boundary to exercise the code more
+	 * effectively. We also don't want to make the size we scan too large,
+	 * otherwise the test can take a long time and cause soft lockups. So
+	 * scan a 1024 byte region across the page boundary.
+	 */
+	size = 1024;
+	start = PAGE_SIZE - (size / 2);
+
+	kmem += start;
+	umem += start;
+
+	zero_start = size / 4;
+	zero_end = size - zero_start;
+
+	/*
+	 * We conduct a series of check_nonzero_user() tests on a block of
+	 * memory with the following byte-pattern (trying every possible
+	 * [start,end] pair):
+	 *
+	 *   [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ]
+	 *
+	 * And we verify that check_nonzero_user() acts identically to
+	 * memchr_inv().
+	 */
+
+	memset(kmem, 0x0, size);
+	for (i = 1; i < zero_start; i += 2)
+		kmem[i] = 0xff;
+	for (i = zero_end; i < size; i += 2)
+		kmem[i] = 0xff;
+
+	KUNIT_EXPECT_EQ_MSG(test, copy_to_user(umem, kmem, size), 0,
+		"legitimate copy_to_user failed");
+
+	for (start = 0; start <= size; start++) {
+		for (end = start; end <= size; end++) {
+			size_t len = end - start;
+			int retval = check_zeroed_user(umem + start, len);
+			int expected = is_zeroed(kmem + start, len);
+
+			KUNIT_ASSERT_EQ_MSG(test, retval, expected,
+				"check_nonzero_user(=%d) != memchr_inv(=%d) mismatch (start=%zu, end=%zu)",
+				retval, expected, start, end);
+		}
+	}
+}
+
+/* Test usage of copy_struct_from_user(). */
+static void usercopy_test_copy_struct_from_user(struct kunit *test)
+{
+	char *umem_src = NULL, *expected = NULL;
+	struct usercopy_test_priv *priv = test->priv;
+	char __user *umem = priv->umem;
+	char *kmem = priv->kmem;
+	size_t size = priv->size;
+	size_t ksize, usize;
+
+	umem_src = kunit_kmalloc(test, size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, umem_src);
+
+	expected = kunit_kmalloc(test, size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, expected);
+
+	/* Fill umem with a fixed byte pattern. */
+	memset(umem_src, 0x3e, size);
+	KUNIT_ASSERT_EQ_MSG(test, copy_to_user(umem, umem_src, size), 0,
+		    "legitimate copy_to_user failed");
+
+	/* Check basic case -- (usize == ksize). */
+	ksize = size;
+	usize = size;
+
+	memcpy(expected, umem_src, ksize);
+
+	memset(kmem, 0x0, size);
+	KUNIT_EXPECT_EQ_MSG(test, copy_struct_from_user(kmem, ksize, umem, usize), 0,
+		    "copy_struct_from_user(usize == ksize) failed");
+	KUNIT_EXPECT_MEMEQ_MSG(test, kmem, expected, ksize,
+		    "copy_struct_from_user(usize == ksize) gives unexpected copy");
+
+	/* Old userspace case -- (usize < ksize). */
+	ksize = size;
+	usize = size / 2;
+
+	memcpy(expected, umem_src, usize);
+	memset(expected + usize, 0x0, ksize - usize);
+
+	memset(kmem, 0x0, size);
+	KUNIT_EXPECT_EQ_MSG(test, copy_struct_from_user(kmem, ksize, umem, usize), 0,
+		    "copy_struct_from_user(usize < ksize) failed");
+	KUNIT_EXPECT_MEMEQ_MSG(test, kmem, expected, ksize,
+		    "copy_struct_from_user(usize < ksize) gives unexpected copy");
+
+	/* New userspace (-E2BIG) case -- (usize > ksize). */
+	ksize = size / 2;
+	usize = size;
+
+	memset(kmem, 0x0, size);
+	KUNIT_EXPECT_EQ_MSG(test, copy_struct_from_user(kmem, ksize, umem, usize), -E2BIG,
+		    "copy_struct_from_user(usize > ksize) didn't give E2BIG");
+
+	/* New userspace (success) case -- (usize > ksize). */
+	ksize = size / 2;
+	usize = size;
+
+	memcpy(expected, umem_src, ksize);
+	KUNIT_EXPECT_EQ_MSG(test, clear_user(umem + ksize, usize - ksize), 0,
+		    "legitimate clear_user failed");
+
+	memset(kmem, 0x0, size);
+	KUNIT_EXPECT_EQ_MSG(test, copy_struct_from_user(kmem, ksize, umem, usize), 0,
+		    "copy_struct_from_user(usize > ksize) failed");
+	KUNIT_EXPECT_MEMEQ_MSG(test, kmem, expected, ksize,
+		    "copy_struct_from_user(usize > ksize) gives unexpected copy");
+}
+
+/*
+ * Legitimate usage: none of these copies should fail.
+ */
+static void usercopy_test_valid(struct kunit *test)
+{
+	struct usercopy_test_priv *priv = test->priv;
+	char __user *usermem = priv->umem;
+	char *kmem = priv->kmem;
+
+	memset(kmem, 0x3a, PAGE_SIZE * 2);
+	KUNIT_EXPECT_EQ_MSG(test, 0, copy_to_user(usermem, kmem, PAGE_SIZE),
+	     "legitimate copy_to_user failed");
+	memset(kmem, 0x0, PAGE_SIZE);
+	KUNIT_EXPECT_EQ_MSG(test, 0, copy_from_user(kmem, usermem, PAGE_SIZE),
+	     "legitimate copy_from_user failed");
+	KUNIT_EXPECT_MEMEQ_MSG(test, kmem, kmem + PAGE_SIZE, PAGE_SIZE,
+	     "legitimate usercopy failed to copy data");
+
+#define test_legit(size, check)						\
+	do {								\
+		size val_##size = (check);				\
+		KUNIT_EXPECT_EQ_MSG(test, 0,				\
+			put_user(val_##size, (size __user *)usermem),	\
+			"legitimate put_user (" #size ") failed");	\
+		val_##size = 0;						\
+		KUNIT_EXPECT_EQ_MSG(test, 0,				\
+			get_user(val_##size, (size __user *)usermem),	\
+			"legitimate get_user (" #size ") failed");	\
+		KUNIT_EXPECT_EQ_MSG(test, val_##size, check,		\
+			"legitimate get_user (" #size ") failed to do copy"); \
+	} while (0)
+
+	test_legit(u8,  0x5a);
+	test_legit(u16, 0x5a5b);
+	test_legit(u32, 0x5a5b5c5d);
+#ifdef TEST_U64
+	test_legit(u64, 0x5a5b5c5d6a6b6c6d);
+#endif
+#undef test_legit
+}
+
+/*
+ * Invalid usage: none of these copies should succeed.
+ */
+static void usercopy_test_invalid(struct kunit *test)
+{
+	struct usercopy_test_priv *priv = test->priv;
+	char __user *usermem = priv->umem;
+	char *bad_usermem = (char *)usermem;
+	char *kmem = priv->kmem;
+	u64 *kmem_u64 = (u64 *)kmem;
+
+	if (IS_ENABLED(CONFIG_ALTERNATE_USER_ADDRESS_SPACE) ||
+	    !IS_ENABLED(CONFIG_MMU)) {
+		kunit_skip(test, "Testing for kernel/userspace address confusion is only sensible on architectures with a shared address space");
+		return;
+	}
+
+	/* Prepare kernel memory with check values. */
+	memset(kmem, 0x5a, PAGE_SIZE);
+	memset(kmem + PAGE_SIZE, 0, PAGE_SIZE);
+
+	/* Reject kernel-to-kernel copies through copy_from_user(). */
+	KUNIT_EXPECT_NE_MSG(test, copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE),
+						 PAGE_SIZE), 0,
+		    "illegal all-kernel copy_from_user passed");
+
+	/* Destination half of buffer should have been zeroed. */
+	KUNIT_EXPECT_MEMEQ_MSG(test, kmem + PAGE_SIZE, kmem, PAGE_SIZE,
+		    "zeroing failure for illegal all-kernel copy_from_user");
+
+#if 0
+	/*
+	 * When running with SMAP/PAN/etc, this will Oops the kernel
+	 * due to the zeroing of userspace memory on failure. This needs
+	 * to be tested in LKDTM instead, since this test module does not
+	 * expect to explode.
+	 */
+	KUNIT_EXPECT_NE_MSG(test, copy_from_user(bad_usermem, (char __user *)kmem,
+						 PAGE_SIZE), 0,
+		    "illegal reversed copy_from_user passed");
+#endif
+	KUNIT_EXPECT_NE_MSG(test, copy_to_user((char __user *)kmem, kmem + PAGE_SIZE,
+					       PAGE_SIZE), 0,
+		    "illegal all-kernel copy_to_user passed");
+
+	KUNIT_EXPECT_NE_MSG(test, copy_to_user((char __user *)kmem, bad_usermem,
+					       PAGE_SIZE), 0,
+		    "illegal reversed copy_to_user passed");
+
+#define test_illegal(size, check)							\
+	do {										\
+		size val_##size = (check);						\
+		/* get_user() */							\
+		KUNIT_EXPECT_NE_MSG(test, get_user(val_##size, (size __user *)kmem), 0,	\
+		    "illegal get_user (" #size ") passed");				\
+		KUNIT_EXPECT_EQ_MSG(test, val_##size, 0,				\
+		    "zeroing failure for illegal get_user (" #size ")");		\
+		/* put_user() */							\
+		*kmem_u64 = 0xF09FA4AFF09FA4AF;						\
+		KUNIT_EXPECT_NE_MSG(test, put_user(val_##size, (size __user *)kmem), 0,	\
+		    "illegal put_user (" #size ") passed");				\
+		KUNIT_EXPECT_EQ_MSG(test, *kmem_u64, 0xF09FA4AFF09FA4AF,		\
+		    "illegal put_user (" #size ") wrote to kernel memory!");		\
+	} while (0)
+
+	test_illegal(u8,  0x5a);
+	test_illegal(u16, 0x5a5b);
+	test_illegal(u32, 0x5a5b5c5d);
+#ifdef TEST_U64
+	test_illegal(u64, 0x5a5b5c5d6a6b6c6d);
+#endif
+#undef test_illegal
+}
+
+static int usercopy_test_init(struct kunit *test)
+{
+	struct usercopy_test_priv *priv;
+	unsigned long user_addr;
+
+	if (!IS_ENABLED(CONFIG_MMU)) {
+		kunit_skip(test, "Userspace allocation testing not available on non-MMU systems");
+		return 0;
+	}
+
+	priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv);
+	test->priv = priv;
+	priv->size = PAGE_SIZE * 2;
+
+	priv->kmem = kunit_kmalloc(test, priv->size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->kmem);
+
+	user_addr = kunit_vm_mmap(test, NULL, 0, priv->size,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	KUNIT_ASSERT_NE_MSG(test, user_addr, 0,
+		"Could not create userspace mm");
+	KUNIT_ASSERT_LT_MSG(test, user_addr, (unsigned long)TASK_SIZE,
+		"Failed to allocate user memory");
+	priv->umem = (char __user *)user_addr;
+
+	return 0;
+}
+
+static struct kunit_case usercopy_test_cases[] = {
+	KUNIT_CASE(usercopy_test_valid),
+	KUNIT_CASE(usercopy_test_invalid),
+	KUNIT_CASE(usercopy_test_check_nonzero_user),
+	KUNIT_CASE(usercopy_test_copy_struct_from_user),
+	{}
+};
+
+static struct kunit_suite usercopy_test_suite = {
+	.name = "usercopy",
+	.init = usercopy_test_init,
+	.test_cases = usercopy_test_cases,
+};
+
+kunit_test_suites(&usercopy_test_suite);
+MODULE_AUTHOR("Kees Cook <kees@kernel.org>");
+MODULE_DESCRIPTION("Kernel module for testing copy_to/from_user infrastructure");
+MODULE_LICENSE("GPL");
diff --git a/lib/util_macros_kunit.c b/lib/util_macros_kunit.c
new file mode 100644
index 000000000000..94cc9f0de50a
--- /dev/null
+++ b/lib/util_macros_kunit.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for bitfield helpers.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/util_macros.h>
+
+#define FIND_CLOSEST_RANGE_CHECK(from, to, array, exp_idx)		\
+{									\
+	int i;								\
+	for (i = from; i <= to; i++) {					\
+		int found = find_closest(i, array, ARRAY_SIZE(array));	\
+		KUNIT_ASSERT_EQ(ctx, exp_idx, found);			\
+	}								\
+}
+
+static void test_find_closest(struct kunit *ctx)
+{
+	/* This will test a few arrays that are found in drivers */
+	static const int ina226_avg_tab[] = { 1, 4, 16, 64, 128, 256, 512, 1024 };
+	static const unsigned int ad7616_oversampling_avail[] = {
+		1, 2, 4, 8, 16, 32, 64, 128,
+	};
+	static u32 wd_timeout_table[] = { 2, 4, 6, 8, 16, 32, 48, 64 };
+	static int array_prog1a[] = { 1, 2, 3, 4, 5 };
+	static u32 array_prog1b[] = { 2, 3, 4, 5, 6 };
+	static int array_prog1mix[] = { -2, -1, 0, 1, 2 };
+	static int array_prog2a[] = { 1, 3, 5, 7 };
+	static u32 array_prog2b[] = { 2, 4, 6, 8 };
+	static int array_prog3a[] = { 1, 4, 7, 10 };
+	static u32 array_prog3b[] = { 2, 5, 8, 11 };
+	static int array_prog4a[] = { 1, 5, 9, 13 };
+	static u32 array_prog4b[] = { 2, 6, 10, 14 };
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, ina226_avg_tab, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 10, ina226_avg_tab, 1);
+	FIND_CLOSEST_RANGE_CHECK(11, 40, ina226_avg_tab, 2);
+	FIND_CLOSEST_RANGE_CHECK(41, 96, ina226_avg_tab, 3);
+	FIND_CLOSEST_RANGE_CHECK(97, 192, ina226_avg_tab, 4);
+	FIND_CLOSEST_RANGE_CHECK(193, 384, ina226_avg_tab, 5);
+	FIND_CLOSEST_RANGE_CHECK(385, 768, ina226_avg_tab, 6);
+	FIND_CLOSEST_RANGE_CHECK(769, 2048, ina226_avg_tab, 7);
+
+	/* The array that found the bug that caused this kunit to exist */
+	FIND_CLOSEST_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 0);
+	FIND_CLOSEST_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 1);
+	FIND_CLOSEST_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 2);
+	FIND_CLOSEST_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 3);
+	FIND_CLOSEST_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 4);
+	FIND_CLOSEST_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 5);
+	FIND_CLOSEST_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 6);
+	FIND_CLOSEST_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 7);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, wd_timeout_table, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 5, wd_timeout_table, 1);
+	FIND_CLOSEST_RANGE_CHECK(6, 7, wd_timeout_table, 2);
+	FIND_CLOSEST_RANGE_CHECK(8, 12, wd_timeout_table, 3);
+	FIND_CLOSEST_RANGE_CHECK(13, 24, wd_timeout_table, 4);
+	FIND_CLOSEST_RANGE_CHECK(25, 40, wd_timeout_table, 5);
+	FIND_CLOSEST_RANGE_CHECK(41, 56, wd_timeout_table, 6);
+	FIND_CLOSEST_RANGE_CHECK(57, 128, wd_timeout_table, 7);
+
+	/* One could argue that find_closest() should not be used for monotonic
+	 * arrays (like 1,2,3,4,5), but even so, it should work as long as the
+	 * array is sorted ascending. */
+	FIND_CLOSEST_RANGE_CHECK(-3, 1, array_prog1a, 0);
+	FIND_CLOSEST_RANGE_CHECK(2, 2, array_prog1a, 1);
+	FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1a, 2);
+	FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1a, 3);
+	FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog1a, 4);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog1b, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1b, 1);
+	FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1b, 2);
+	FIND_CLOSEST_RANGE_CHECK(5, 5, array_prog1b, 3);
+	FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog1b, 4);
+
+	FIND_CLOSEST_RANGE_CHECK(-4, -2, array_prog1mix, 0);
+	FIND_CLOSEST_RANGE_CHECK(-1, -1, array_prog1mix, 1);
+	FIND_CLOSEST_RANGE_CHECK(0, 0, array_prog1mix, 2);
+	FIND_CLOSEST_RANGE_CHECK(1, 1, array_prog1mix, 3);
+	FIND_CLOSEST_RANGE_CHECK(2, 5, array_prog1mix, 4);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog2a, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 4, array_prog2a, 1);
+	FIND_CLOSEST_RANGE_CHECK(5, 6, array_prog2a, 2);
+	FIND_CLOSEST_RANGE_CHECK(7, 10, array_prog2a, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog2b, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 5, array_prog2b, 1);
+	FIND_CLOSEST_RANGE_CHECK(6, 7, array_prog2b, 2);
+	FIND_CLOSEST_RANGE_CHECK(8, 10, array_prog2b, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog3a, 0);
+	FIND_CLOSEST_RANGE_CHECK(3, 5, array_prog3a, 1);
+	FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog3a, 2);
+	FIND_CLOSEST_RANGE_CHECK(9, 20, array_prog3a, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog3b, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 6, array_prog3b, 1);
+	FIND_CLOSEST_RANGE_CHECK(7, 9, array_prog3b, 2);
+	FIND_CLOSEST_RANGE_CHECK(10, 20, array_prog3b, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog4a, 0);
+	FIND_CLOSEST_RANGE_CHECK(4, 7, array_prog4a, 1);
+	FIND_CLOSEST_RANGE_CHECK(8, 11, array_prog4a, 2);
+	FIND_CLOSEST_RANGE_CHECK(12, 20, array_prog4a, 3);
+
+	FIND_CLOSEST_RANGE_CHECK(-3, 4, array_prog4b, 0);
+	FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog4b, 1);
+	FIND_CLOSEST_RANGE_CHECK(9, 12, array_prog4b, 2);
+	FIND_CLOSEST_RANGE_CHECK(13, 20, array_prog4b, 3);
+}
+
+#define FIND_CLOSEST_DESC_RANGE_CHECK(from, to, array, exp_idx)	\
+{									\
+	int i;								\
+	for (i = from; i <= to; i++) {					\
+		int found = find_closest_descending(i, array,		\
+						ARRAY_SIZE(array));	\
+		KUNIT_ASSERT_EQ(ctx, exp_idx, found);			\
+	}								\
+}
+
+static void test_find_closest_descending(struct kunit *ctx)
+{
+	/* Same arrays as 'test_find_closest' but reversed */
+	static const int ina226_avg_tab[] = { 1024, 512, 256, 128, 64, 16, 4, 1 };
+	static const unsigned int ad7616_oversampling_avail[] = {
+		128, 64, 32, 16, 8, 4, 2, 1
+	};
+	static u32 wd_timeout_table[] = { 64, 48, 32, 16, 8, 6, 4, 2 };
+	static int array_prog1a[] = { 5, 4, 3, 2, 1 };
+	static u32 array_prog1b[] = { 6, 5, 4, 3, 2 };
+	static int array_prog1mix[] = { 2, 1, 0, -1, -2 };
+	static int array_prog2a[] = { 7, 5, 3, 1 };
+	static u32 array_prog2b[] = { 8, 6, 4, 2 };
+	static int array_prog3a[] = { 10, 7, 4, 1 };
+	static u32 array_prog3b[] = { 11, 8, 5, 2 };
+	static int array_prog4a[] = { 13, 9, 5, 1 };
+	static u32 array_prog4b[] = { 14, 10, 6, 2 };
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, ina226_avg_tab, 7);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 10, ina226_avg_tab, 6);
+	FIND_CLOSEST_DESC_RANGE_CHECK(11, 40, ina226_avg_tab, 5);
+	FIND_CLOSEST_DESC_RANGE_CHECK(41, 96, ina226_avg_tab, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(97, 192, ina226_avg_tab, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(193, 384, ina226_avg_tab, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(385, 768, ina226_avg_tab, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(769, 2048, ina226_avg_tab, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 7);
+	FIND_CLOSEST_DESC_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 6);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 5);
+	FIND_CLOSEST_DESC_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, wd_timeout_table, 7);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, wd_timeout_table, 6);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, wd_timeout_table, 5);
+	FIND_CLOSEST_DESC_RANGE_CHECK(8, 12, wd_timeout_table, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, wd_timeout_table, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(25, 40, wd_timeout_table, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(41, 56, wd_timeout_table, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(57, 128, wd_timeout_table, 0);
+
+	/* One could argue that find_closest_descending() should not be used
+	 * for monotonic arrays (like 5,4,3,2,1), but even so, it should still
+	 * it should work as long as the array is sorted descending. */
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, array_prog1a, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(2, 2, array_prog1a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog1a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog1b, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 5, array_prog1b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog1b, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-4, -2, array_prog1mix, 4);
+	FIND_CLOSEST_DESC_RANGE_CHECK(-1, -1, array_prog1mix, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(0, 0, array_prog1mix, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(1, 1, array_prog1mix, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(2, 5, array_prog1mix, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog2a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 4, array_prog2a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 6, array_prog2a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(7, 10, array_prog2a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog2b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, array_prog2b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, array_prog2b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(8, 10, array_prog2b, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog3a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(3, 5, array_prog3a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog3a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(9, 20, array_prog3a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog3b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, array_prog3b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(7, 9, array_prog3b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(10, 20, array_prog3b, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog4a, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(4, 7, array_prog4a, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(8, 11, array_prog4a, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(12, 20, array_prog4a, 0);
+
+	FIND_CLOSEST_DESC_RANGE_CHECK(-3, 4, array_prog4b, 3);
+	FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog4b, 2);
+	FIND_CLOSEST_DESC_RANGE_CHECK(9, 12, array_prog4b, 1);
+	FIND_CLOSEST_DESC_RANGE_CHECK(13, 20, array_prog4b, 0);
+}
+
+static struct kunit_case __refdata util_macros_test_cases[] = {
+	KUNIT_CASE(test_find_closest),
+	KUNIT_CASE(test_find_closest_descending),
+	{}
+};
+
+static struct kunit_suite util_macros_test_suite = {
+	.name = "util_macros.h",
+	.test_cases = util_macros_test_cases,
+};
+
+kunit_test_suites(&util_macros_test_suite);
+
+MODULE_AUTHOR("Alexandru Ardelean <aardelean@baylibre.com>");
+MODULE_DESCRIPTION("Test cases for util_macros.h helpers");
+MODULE_LICENSE("GPL");
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index c46c2300517c..82fe827af542 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -38,3 +38,8 @@ config GENERIC_VDSO_OVERFLOW_PROTECT
 	  in the hotpath.
 
 endif
+
+config VDSO_GETRANDOM
+	bool
+	help
+	  Selected by architectures that support vDSO getrandom().
diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile
index 9f031eafc465..cedbf15f8087 100644
--- a/lib/vdso/Makefile
+++ b/lib/vdso/Makefile
@@ -4,6 +4,7 @@ GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
 GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH))
 
 c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c)
+c-getrandom-$(CONFIG_VDSO_GETRANDOM) := $(addprefix $(GENERIC_VDSO_DIR), getrandom.c)
 
 # This cmd checks that the vdso library does not contain dynamic relocations.
 # It has to be called after the linking of the vdso library and requires it
diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c
new file mode 100644
index 000000000000..938ca539aaa6
--- /dev/null
+++ b/lib/vdso/getrandom.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/array_size.h>
+#include <linux/minmax.h>
+#include <vdso/datapage.h>
+#include <vdso/getrandom.h>
+#include <vdso/unaligned.h>
+#include <asm/vdso/getrandom.h>
+#include <uapi/linux/mman.h>
+#include <uapi/linux/random.h>
+
+#undef PAGE_SIZE
+#undef PAGE_MASK
+#define PAGE_SIZE (1UL << CONFIG_PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do {				\
+	while (len >= sizeof(type)) {						\
+		__put_unaligned_t(type, __get_unaligned_t(type, src), dst);	\
+		__put_unaligned_t(type, 0, src);				\
+		dst += sizeof(type);						\
+		src += sizeof(type);						\
+		len -= sizeof(type);						\
+	}									\
+} while (0)
+
+static void memcpy_and_zero_src(void *dst, void *src, size_t len)
+{
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+		if (IS_ENABLED(CONFIG_64BIT))
+			MEMCPY_AND_ZERO_SRC(u64, dst, src, len);
+		MEMCPY_AND_ZERO_SRC(u32, dst, src, len);
+		MEMCPY_AND_ZERO_SRC(u16, dst, src, len);
+	}
+	MEMCPY_AND_ZERO_SRC(u8, dst, src, len);
+}
+
+/**
+ * __cvdso_getrandom_data - Generic vDSO implementation of getrandom() syscall.
+ * @rng_info:		Describes state of kernel RNG, memory shared with kernel.
+ * @buffer:		Destination buffer to fill with random bytes.
+ * @len:		Size of @buffer in bytes.
+ * @flags:		Zero or more GRND_* flags.
+ * @opaque_state:	Pointer to an opaque state area.
+ * @opaque_len:		Length of opaque state area.
+ *
+ * This implements a "fast key erasure" RNG using ChaCha20, in the same way that the kernel's
+ * getrandom() syscall does. It periodically reseeds its key from the kernel's RNG, at the same
+ * schedule that the kernel's RNG is reseeded. If the kernel's RNG is not ready, then this always
+ * calls into the syscall.
+ *
+ * If @buffer, @len, and @flags are 0, and @opaque_len is ~0UL, then @opaque_state is populated
+ * with a struct vgetrandom_opaque_params and the function returns 0; if it does not return 0,
+ * this function should not be used.
+ *
+ * @opaque_state *must* be allocated by calling mmap(2) using the mmap_prot and mmap_flags fields
+ * from the struct vgetrandom_opaque_params, and states must not straddle pages. Unless external
+ * locking is used, one state must be allocated per thread, as it is not safe to call this function
+ * concurrently with the same @opaque_state. However, it is safe to call this using the same
+ * @opaque_state that is shared between main code and signal handling code, within the same thread.
+ *
+ * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t
+__cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_t len,
+		       unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+	ssize_t ret = min_t(size_t, INT_MAX & PAGE_MASK /* = MAX_RW_COUNT */, len);
+	struct vgetrandom_state *state = opaque_state;
+	size_t batch_len, nblocks, orig_len = len;
+	bool in_use, have_retried = false;
+	void *orig_buffer = buffer;
+	u64 current_generation;
+	u32 counter[2] = { 0 };
+
+	if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) {
+		struct vgetrandom_opaque_params *params = opaque_state;
+		params->size_of_opaque_state = sizeof(*state);
+		params->mmap_prot = PROT_READ | PROT_WRITE;
+		params->mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS;
+		for (size_t i = 0; i < ARRAY_SIZE(params->reserved); ++i)
+			params->reserved[i] = 0;
+		return 0;
+	}
+
+	/* The state must not straddle a page, since pages can be zeroed at any time. */
+	if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE))
+		return -EFAULT;
+
+	/* Handle unexpected flags by falling back to the kernel. */
+	if (unlikely(flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)))
+		goto fallback_syscall;
+
+	/* If the caller passes the wrong size, which might happen due to CRIU, fallback. */
+	if (unlikely(opaque_len != sizeof(*state)))
+		goto fallback_syscall;
+
+	/*
+	 * If the kernel's RNG is not yet ready, then it's not possible to provide random bytes from
+	 * userspace, because A) the various @flags require this to block, or not, depending on
+	 * various factors unavailable to userspace, and B) the kernel's behavior before the RNG is
+	 * ready is to reseed from the entropy pool at every invocation.
+	 */
+	if (unlikely(!READ_ONCE(rng_info->is_ready)))
+		goto fallback_syscall;
+
+	/*
+	 * This condition is checked after @rng_info->is_ready, because before the kernel's RNG is
+	 * initialized, the @flags parameter may require this to block or return an error, even when
+	 * len is zero.
+	 */
+	if (unlikely(!len))
+		return 0;
+
+	/*
+	 * @state->in_use is basic reentrancy protection against this running in a signal handler
+	 * with the same @opaque_state, but obviously not atomic wrt multiple CPUs or more than one
+	 * level of reentrancy. If a signal interrupts this after reading @state->in_use, but before
+	 * writing @state->in_use, there is still no race, because the signal handler will run to
+	 * its completion before returning execution.
+	 */
+	in_use = READ_ONCE(state->in_use);
+	if (unlikely(in_use))
+		/* The syscall simply fills the buffer and does not touch @state, so fallback. */
+		goto fallback_syscall;
+	WRITE_ONCE(state->in_use, true);
+
+retry_generation:
+	/*
+	 * @rng_info->generation must always be read here, as it serializes @state->key with the
+	 * kernel's RNG reseeding schedule.
+	 */
+	current_generation = READ_ONCE(rng_info->generation);
+
+	/*
+	 * If @state->generation doesn't match the kernel RNG's generation, then it means the
+	 * kernel's RNG has reseeded, and so @state->key is reseeded as well.
+	 */
+	if (unlikely(state->generation != current_generation)) {
+		/*
+		 * Write the generation before filling the key, in case of fork. If there is a fork
+		 * just after this line, the parent and child will get different random bytes from
+		 * the syscall, which is good. However, were this line to occur after the getrandom
+		 * syscall, then both child and parent could have the same bytes and the same
+		 * generation counter, so the fork would not be detected. Therefore, write
+		 * @state->generation before the call to the getrandom syscall.
+		 */
+		WRITE_ONCE(state->generation, current_generation);
+
+		/*
+		 * Prevent the syscall from being reordered wrt current_generation. Pairs with the
+		 * smp_store_release(&_vdso_rng_data.generation) in random.c.
+		 */
+		smp_rmb();
+
+		/* Reseed @state->key using fresh bytes from the kernel. */
+		if (getrandom_syscall(state->key, sizeof(state->key), 0) != sizeof(state->key)) {
+			/*
+			 * If the syscall failed to refresh the key, then @state->key is now
+			 * invalid, so invalidate the generation so that it is not used again, and
+			 * fallback to using the syscall entirely.
+			 */
+			WRITE_ONCE(state->generation, 0);
+
+			/*
+			 * Set @state->in_use to false only after the last write to @state in the
+			 * line above.
+			 */
+			WRITE_ONCE(state->in_use, false);
+
+			goto fallback_syscall;
+		}
+
+		/*
+		 * Set @state->pos to beyond the end of the batch, so that the batch is refilled
+		 * using the new key.
+		 */
+		state->pos = sizeof(state->batch);
+	}
+
+	/* Set len to the total amount of bytes that this function is allowed to read, ret. */
+	len = ret;
+more_batch:
+	/*
+	 * First use bytes out of @state->batch, which may have been filled by the last call to this
+	 * function.
+	 */
+	batch_len = min_t(size_t, sizeof(state->batch) - state->pos, len);
+	if (batch_len) {
+		/* Zeroing at the same time as memcpying helps preserve forward secrecy. */
+		memcpy_and_zero_src(buffer, state->batch + state->pos, batch_len);
+		state->pos += batch_len;
+		buffer += batch_len;
+		len -= batch_len;
+	}
+
+	if (!len) {
+		/* Prevent the loop from being reordered wrt ->generation. */
+		barrier();
+
+		/*
+		 * Since @rng_info->generation will never be 0, re-read @state->generation, rather
+		 * than using the local current_generation variable, to learn whether a fork
+		 * occurred or if @state was zeroed due to memory pressure. Primarily, though, this
+		 * indicates whether the kernel's RNG has reseeded, in which case generate a new key
+		 * and start over.
+		 */
+		if (unlikely(READ_ONCE(state->generation) != READ_ONCE(rng_info->generation))) {
+			/*
+			 * Prevent this from looping forever in case of low memory or racing with a
+			 * user force-reseeding the kernel's RNG using the ioctl.
+			 */
+			if (have_retried) {
+				WRITE_ONCE(state->in_use, false);
+				goto fallback_syscall;
+			}
+
+			have_retried = true;
+			buffer = orig_buffer;
+			goto retry_generation;
+		}
+
+		/*
+		 * Set @state->in_use to false only when there will be no more reads or writes of
+		 * @state.
+		 */
+		WRITE_ONCE(state->in_use, false);
+		return ret;
+	}
+
+	/* Generate blocks of RNG output directly into @buffer while there's enough room left. */
+	nblocks = len / CHACHA_BLOCK_SIZE;
+	if (nblocks) {
+		__arch_chacha20_blocks_nostack(buffer, state->key, counter, nblocks);
+		buffer += nblocks * CHACHA_BLOCK_SIZE;
+		len -= nblocks * CHACHA_BLOCK_SIZE;
+	}
+
+	BUILD_BUG_ON(sizeof(state->batch_key) % CHACHA_BLOCK_SIZE != 0);
+
+	/* Refill the batch and overwrite the key, in order to preserve forward secrecy. */
+	__arch_chacha20_blocks_nostack(state->batch_key, state->key, counter,
+				       sizeof(state->batch_key) / CHACHA_BLOCK_SIZE);
+
+	/* Since the batch was just refilled, set the position back to 0 to indicate a full batch. */
+	state->pos = 0;
+	goto more_batch;
+
+fallback_syscall:
+	return getrandom_syscall(orig_buffer, orig_len, flags);
+}
+
+static __always_inline ssize_t
+__cvdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+	return __cvdso_getrandom_data(__arch_get_vdso_rng_data(), buffer, len, flags, opaque_state, opaque_len);
+}
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 899850bd6f0b..c01eaafd8041 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -140,14 +140,14 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
 
 	do {
 		/*
-		 * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace
-		 * enabled tasks have a special VVAR page installed which
-		 * has vd->seq set to 1 and vd->clock_mode set to
-		 * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks
-		 * this does not affect performance because if vd->seq is
-		 * odd, i.e. a concurrent update is in progress the extra
-		 * check for vd->clock_mode is just a few extra
-		 * instructions while spin waiting for vd->seq to become
+		 * Open coded function vdso_read_begin() to handle
+		 * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a
+		 * special VVAR page installed which has vd->seq set to 1 and
+		 * vd->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time
+		 * namespace affected tasks this does not affect performance
+		 * because if vd->seq is odd, i.e. a concurrent update is in
+		 * progress the extra check for vd->clock_mode is just a few
+		 * extra instructions while spin waiting for vd->seq to become
 		 * even again.
 		 */
 		while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) {
@@ -223,8 +223,8 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
 
 	do {
 		/*
-		 * Open coded to handle VDSO_CLOCK_TIMENS. See comment in
-		 * do_hres().
+		 * Open coded function vdso_read_begin() to handle
+		 * VDSO_CLOCK_TIMENS. See comment in do_hres().
 		 */
 		while ((seq = READ_ONCE(vd->seq)) & 1) {
 			if (IS_ENABLED(CONFIG_TIME_NS) &&
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 552738f14275..56fe96319292 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -51,7 +51,7 @@
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/byteorder.h>	/* cpu_to_le16 */
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #include <linux/string_helpers.h>
 #include "kstrtox.h"
@@ -160,8 +160,7 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 }
 EXPORT_SYMBOL(simple_strtoll);
 
-static noinline_for_stack
-int skip_atoi(const char **s)
+static inline int skip_atoi(const char **s)
 {
 	int i = 0;
 
@@ -407,7 +406,7 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 	return len + width;
 }
 
-#define SIGN	1		/* unsigned/signed, must be 1 */
+#define SIGN	1		/* unsigned/signed */
 #define LEFT	2		/* left justified */
 #define PLUS	4		/* show plus */
 #define SPACE	8		/* space if plus */
@@ -415,38 +414,26 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 #define SMALL	32		/* use lowercase in hex (must be 32 == 0x20) */
 #define SPECIAL	64		/* prefix hex with "0x", octal with "0" */
 
-static_assert(SIGN == 1);
 static_assert(ZEROPAD == ('0' - ' '));
 static_assert(SMALL == ('a' ^ 'A'));
 
-enum format_type {
-	FORMAT_TYPE_NONE, /* Just a string part */
-	FORMAT_TYPE_WIDTH,
-	FORMAT_TYPE_PRECISION,
-	FORMAT_TYPE_CHAR,
-	FORMAT_TYPE_STR,
-	FORMAT_TYPE_PTR,
-	FORMAT_TYPE_PERCENT_CHAR,
-	FORMAT_TYPE_INVALID,
-	FORMAT_TYPE_LONG_LONG,
-	FORMAT_TYPE_ULONG,
-	FORMAT_TYPE_LONG,
-	FORMAT_TYPE_UBYTE,
-	FORMAT_TYPE_BYTE,
-	FORMAT_TYPE_USHORT,
-	FORMAT_TYPE_SHORT,
-	FORMAT_TYPE_UINT,
-	FORMAT_TYPE_INT,
-	FORMAT_TYPE_SIZE_T,
-	FORMAT_TYPE_PTRDIFF
+enum format_state {
+	FORMAT_STATE_NONE, /* Just a string part */
+	FORMAT_STATE_NUM,
+	FORMAT_STATE_WIDTH,
+	FORMAT_STATE_PRECISION,
+	FORMAT_STATE_CHAR,
+	FORMAT_STATE_STR,
+	FORMAT_STATE_PTR,
+	FORMAT_STATE_PERCENT_CHAR,
+	FORMAT_STATE_INVALID,
 };
 
 struct printf_spec {
-	unsigned int	type:8;		/* format_type enum */
-	signed int	field_width:24;	/* width of output field */
-	unsigned int	flags:8;	/* flags to number() */
-	unsigned int	base:8;		/* number base, 8, 10 or 16 only */
-	signed int	precision:16;	/* # of digits/chars */
+	unsigned char	flags;		/* flags to number() */
+	unsigned char	base;		/* number base, 8, 10 or 16 only */
+	short		precision;	/* # of digits/chars */
+	int		field_width;	/* width of output field */
 } __packed;
 static_assert(sizeof(struct printf_spec) == 8);
 
@@ -579,7 +566,6 @@ char *special_hex_number(char *buf, char *end, unsigned long long num, int size)
 {
 	struct printf_spec spec;
 
-	spec.type = FORMAT_TYPE_PTR;
 	spec.field_width = 2 + 2 * size;	/* 0x + hex */
 	spec.flags = SPECIAL | SMALL | ZEROPAD;
 	spec.base = 16;
@@ -966,13 +952,13 @@ char *bdev_name(char *buf, char *end, struct block_device *bdev,
 
 	hd = bdev->bd_disk;
 	buf = string(buf, end, hd->disk_name, spec);
-	if (bdev->bd_partno) {
+	if (bdev_is_partition(bdev)) {
 		if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) {
 			if (buf < end)
 				*buf = 'p';
 			buf++;
 		}
-		buf = number(buf, end, bdev->bd_partno, spec);
+		buf = number(buf, end, bdev_partno(bdev), spec);
 	}
 	return buf;
 }
@@ -1040,6 +1026,20 @@ static const struct printf_spec default_dec04_spec = {
 };
 
 static noinline_for_stack
+char *hex_range(char *buf, char *end, u64 start_val, u64 end_val,
+		struct printf_spec spec)
+{
+	buf = number(buf, end, start_val, spec);
+	if (start_val == end_val)
+		return buf;
+
+	if (buf < end)
+		*buf = '-';
+	++buf;
+	return number(buf, end, end_val, spec);
+}
+
+static noinline_for_stack
 char *resource_string(char *buf, char *end, struct resource *res,
 		      struct printf_spec spec, const char *fmt)
 {
@@ -1080,7 +1080,7 @@ char *resource_string(char *buf, char *end, struct resource *res,
 #define FLAG_BUF_SIZE		(2 * sizeof(res->flags))
 #define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref window disabled]")
 #define RAW_BUF_SIZE		sizeof("[mem - flags 0x]")
-	char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
+	char sym[MAX(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
 		     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
 
 	char *p = sym, *pend = sym + sizeof(sym);
@@ -1115,11 +1115,7 @@ char *resource_string(char *buf, char *end, struct resource *res,
 		p = string_nocheck(p, pend, "size ", str_spec);
 		p = number(p, pend, resource_size(res), *specp);
 	} else {
-		p = number(p, pend, res->start, *specp);
-		if (res->start != res->end) {
-			*p++ = '-';
-			p = number(p, pend, res->end, *specp);
-		}
+		p = hex_range(p, pend, res->start, res->end, *specp);
 	}
 	if (decode) {
 		if (res->flags & IORESOURCE_MEM_64)
@@ -1141,6 +1137,31 @@ char *resource_string(char *buf, char *end, struct resource *res,
 }
 
 static noinline_for_stack
+char *range_string(char *buf, char *end, const struct range *range,
+		   struct printf_spec spec, const char *fmt)
+{
+	char sym[sizeof("[range 0x0123456789abcdef-0x0123456789abcdef]")];
+	char *p = sym, *pend = sym + sizeof(sym);
+
+	struct printf_spec range_spec = {
+		.field_width = 2 + 2 * sizeof(range->start), /* 0x + 2 * 8 */
+		.flags = SPECIAL | SMALL | ZEROPAD,
+		.base = 16,
+		.precision = -1,
+	};
+
+	if (check_pointer(&buf, end, range, spec))
+		return buf;
+
+	p = string_nocheck(p, pend, "[range ", default_str_spec);
+	p = hex_range(p, pend, range->start, range->end, range_spec);
+	*p++ = ']';
+	*p = '\0';
+
+	return string_nocheck(buf, end, sym, spec);
+}
+
+static noinline_for_stack
 char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 		 const char *fmt)
 {
@@ -2054,25 +2075,6 @@ char *format_page_flags(char *buf, char *end, unsigned long flags)
 	return buf;
 }
 
-static
-char *format_page_type(char *buf, char *end, unsigned int page_type)
-{
-	buf = number(buf, end, page_type, default_flag_spec);
-
-	if (buf < end)
-		*buf = '(';
-	buf++;
-
-	if (page_type_has_type(page_type))
-		buf = format_flags(buf, end, ~page_type, pagetype_names);
-
-	if (buf < end)
-		*buf = ')';
-	buf++;
-
-	return buf;
-}
-
 static noinline_for_stack
 char *flags_string(char *buf, char *end, void *flags_ptr,
 		   struct printf_spec spec, const char *fmt)
@@ -2086,8 +2088,6 @@ char *flags_string(char *buf, char *end, void *flags_ptr,
 	switch (fmt[1]) {
 	case 'p':
 		return format_page_flags(buf, end, *(unsigned long *)flags_ptr);
-	case 't':
-		return format_page_type(buf, end, *(unsigned int *)flags_ptr);
 	case 'v':
 		flags = *(unsigned long *)flags_ptr;
 		names = vmaflag_names;
@@ -2250,6 +2250,15 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
 	return widen_string(buf, buf - buf_start, end, spec);
 }
 
+static noinline_for_stack
+char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr,
+			struct printf_spec spec)
+{
+	if (*fmt == 'r' && fmt[1] == 'a')
+		return range_string(buf, end, ptr, spec, fmt);
+	return resource_string(buf, end, ptr, spec, fmt);
+}
+
 int __init no_hash_pointers_enable(char *str)
 {
 	if (no_hash_pointers)
@@ -2298,6 +2307,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr);
  * - 'Bb' as above with module build ID (for use in backtraces)
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
  * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
+ * - 'ra' For struct ranges, e.g., [range 0x0000000000000000 - 0x00000000000000ff]
  * - 'b[l]' For a bitmap, the number of bits is determined by the field
  *       width which must be explicitly specified either as part of the
  *       format string '%32b[l]' or through '%*b[l]', [l] selects
@@ -2422,7 +2432,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		return symbol_string(buf, end, ptr, spec, fmt);
 	case 'R':
 	case 'r':
-		return resource_string(buf, end, ptr, spec, fmt);
+		return resource_or_range(fmt, buf, end, ptr, spec);
 	case 'h':
 		return hex_string(buf, end, ptr, spec, fmt);
 	case 'b':
@@ -2506,6 +2516,26 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	}
 }
 
+struct fmt {
+	const char *str;
+	unsigned char state;	// enum format_state
+	unsigned char size;	// size of numbers
+};
+
+#define SPEC_CHAR(x, flag) [(x)-32] = flag
+static unsigned char spec_flag(unsigned char c)
+{
+	static const unsigned char spec_flag_array[] = {
+		SPEC_CHAR(' ', SPACE),
+		SPEC_CHAR('#', SPECIAL),
+		SPEC_CHAR('+', PLUS),
+		SPEC_CHAR('-', LEFT),
+		SPEC_CHAR('0', ZEROPAD),
+	};
+	c -= 32;
+	return (c < sizeof(spec_flag_array)) ? spec_flag_array[c] : 0;
+}
+
 /*
  * Helper function to decode printf style format.
  * Each call decode a token from the format and return the
@@ -2528,181 +2558,141 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
  * @qualifier: qualifier of a number (long, size_t, ...)
  */
 static noinline_for_stack
-int format_decode(const char *fmt, struct printf_spec *spec)
+struct fmt format_decode(struct fmt fmt, struct printf_spec *spec)
 {
-	const char *start = fmt;
-	char qualifier;
+	const char *start = fmt.str;
+	char flag;
 
 	/* we finished early by reading the field width */
-	if (spec->type == FORMAT_TYPE_WIDTH) {
+	if (unlikely(fmt.state == FORMAT_STATE_WIDTH)) {
 		if (spec->field_width < 0) {
 			spec->field_width = -spec->field_width;
 			spec->flags |= LEFT;
 		}
-		spec->type = FORMAT_TYPE_NONE;
+		fmt.state = FORMAT_STATE_NONE;
 		goto precision;
 	}
 
 	/* we finished early by reading the precision */
-	if (spec->type == FORMAT_TYPE_PRECISION) {
+	if (unlikely(fmt.state == FORMAT_STATE_PRECISION)) {
 		if (spec->precision < 0)
 			spec->precision = 0;
 
-		spec->type = FORMAT_TYPE_NONE;
+		fmt.state = FORMAT_STATE_NONE;
 		goto qualifier;
 	}
 
 	/* By default */
-	spec->type = FORMAT_TYPE_NONE;
+	fmt.state = FORMAT_STATE_NONE;
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt == '%')
+	for (; *fmt.str ; fmt.str++) {
+		if (*fmt.str == '%')
 			break;
 	}
 
 	/* Return the current non-format string */
-	if (fmt != start || !*fmt)
-		return fmt - start;
+	if (fmt.str != start || !*fmt.str)
+		return fmt;
 
-	/* Process flags */
+	/* Process flags. This also skips the first '%' */
 	spec->flags = 0;
-
-	while (1) { /* this also skips first '%' */
-		bool found = true;
-
-		++fmt;
-
-		switch (*fmt) {
-		case '-': spec->flags |= LEFT;    break;
-		case '+': spec->flags |= PLUS;    break;
-		case ' ': spec->flags |= SPACE;   break;
-		case '#': spec->flags |= SPECIAL; break;
-		case '0': spec->flags |= ZEROPAD; break;
-		default:  found = false;
-		}
-
-		if (!found)
-			break;
-	}
+	do {
+		/* this also skips first '%' */
+		flag = spec_flag(*++fmt.str);
+		spec->flags |= flag;
+	} while (flag);
 
 	/* get field width */
 	spec->field_width = -1;
 
-	if (isdigit(*fmt))
-		spec->field_width = skip_atoi(&fmt);
-	else if (*fmt == '*') {
+	if (isdigit(*fmt.str))
+		spec->field_width = skip_atoi(&fmt.str);
+	else if (unlikely(*fmt.str == '*')) {
 		/* it's the next argument */
-		spec->type = FORMAT_TYPE_WIDTH;
-		return ++fmt - start;
+		fmt.state = FORMAT_STATE_WIDTH;
+		fmt.str++;
+		return fmt;
 	}
 
 precision:
 	/* get the precision */
 	spec->precision = -1;
-	if (*fmt == '.') {
-		++fmt;
-		if (isdigit(*fmt)) {
-			spec->precision = skip_atoi(&fmt);
+	if (unlikely(*fmt.str == '.')) {
+		fmt.str++;
+		if (isdigit(*fmt.str)) {
+			spec->precision = skip_atoi(&fmt.str);
 			if (spec->precision < 0)
 				spec->precision = 0;
-		} else if (*fmt == '*') {
+		} else if (*fmt.str == '*') {
 			/* it's the next argument */
-			spec->type = FORMAT_TYPE_PRECISION;
-			return ++fmt - start;
+			fmt.state = FORMAT_STATE_PRECISION;
+			fmt.str++;
+			return fmt;
 		}
 	}
 
 qualifier:
-	/* get the conversion qualifier */
-	qualifier = 0;
-	if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
-	    *fmt == 'z' || *fmt == 't') {
-		qualifier = *fmt++;
-		if (unlikely(qualifier == *fmt)) {
-			if (qualifier == 'l') {
-				qualifier = 'L';
-				++fmt;
-			} else if (qualifier == 'h') {
-				qualifier = 'H';
-				++fmt;
-			}
-		}
-	}
-
-	/* default base */
+	/* Set up default numeric format */
 	spec->base = 10;
-	switch (*fmt) {
-	case 'c':
-		spec->type = FORMAT_TYPE_CHAR;
-		return ++fmt - start;
-
-	case 's':
-		spec->type = FORMAT_TYPE_STR;
-		return ++fmt - start;
-
-	case 'p':
-		spec->type = FORMAT_TYPE_PTR;
-		return ++fmt - start;
-
-	case '%':
-		spec->type = FORMAT_TYPE_PERCENT_CHAR;
-		return ++fmt - start;
-
-	/* integer number formats - set up the flags and "break" */
-	case 'o':
-		spec->base = 8;
-		break;
+	fmt.state = FORMAT_STATE_NUM;
+	fmt.size = sizeof(int);
+	static const struct format_state {
+		unsigned char state;
+		unsigned char size;
+		unsigned char flags_or_double_size;
+		unsigned char base;
+	} lookup_state[256] = {
+		// Length
+		['l'] = { 0, sizeof(long), sizeof(long long) },
+		['L'] = { 0, sizeof(long long) },
+		['h'] = { 0, sizeof(short), sizeof(char) },
+		['H'] = { 0, sizeof(char) },	// Questionable historical
+		['z'] = { 0, sizeof(size_t) },
+		['t'] = { 0, sizeof(ptrdiff_t) },
+
+		// Non-numeric formats
+		['c'] = { FORMAT_STATE_CHAR },
+		['s'] = { FORMAT_STATE_STR },
+		['p'] = { FORMAT_STATE_PTR },
+		['%'] = { FORMAT_STATE_PERCENT_CHAR },
+
+		// Numerics
+		['o'] = { FORMAT_STATE_NUM, 0, 0, 8 },
+		['x'] = { FORMAT_STATE_NUM, 0, SMALL, 16 },
+		['X'] = { FORMAT_STATE_NUM, 0, 0, 16 },
+		['d'] = { FORMAT_STATE_NUM, 0, SIGN, 10 },
+		['i'] = { FORMAT_STATE_NUM, 0, SIGN, 10 },
+		['u'] = { FORMAT_STATE_NUM, 0, 0, 10, },
 
-	case 'x':
-		spec->flags |= SMALL;
-		fallthrough;
-
-	case 'X':
-		spec->base = 16;
-		break;
-
-	case 'd':
-	case 'i':
-		spec->flags |= SIGN;
-		break;
-	case 'u':
-		break;
-
-	case 'n':
 		/*
 		 * Since %n poses a greater security risk than
 		 * utility, treat it as any other invalid or
 		 * unsupported format specifier.
 		 */
-		fallthrough;
+	};
 
-	default:
-		WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt);
-		spec->type = FORMAT_TYPE_INVALID;
-		return fmt - start;
-	}
-
-	if (qualifier == 'L')
-		spec->type = FORMAT_TYPE_LONG_LONG;
-	else if (qualifier == 'l') {
-		BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG);
-		spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN);
-	} else if (qualifier == 'z') {
-		spec->type = FORMAT_TYPE_SIZE_T;
-	} else if (qualifier == 't') {
-		spec->type = FORMAT_TYPE_PTRDIFF;
-	} else if (qualifier == 'H') {
-		BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE);
-		spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN);
-	} else if (qualifier == 'h') {
-		BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT);
-		spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN);
-	} else {
-		BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT);
-		spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN);
+	const struct format_state *p = lookup_state + (u8)*fmt.str;
+	if (p->size) {
+		fmt.size = p->size;
+		if (p->flags_or_double_size && fmt.str[0] == fmt.str[1]) {
+			fmt.size = p->flags_or_double_size;
+			fmt.str++;
+		}
+		fmt.str++;
+		p = lookup_state + *fmt.str;
+	}
+	if (p->state) {
+		if (p->base)
+			spec->base = p->base;
+		spec->flags |= p->flags_or_double_size;
+		fmt.state = p->state;
+		fmt.str++;
+		return fmt;
 	}
 
-	return ++fmt - start;
+	WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt.str);
+	fmt.state = FORMAT_STATE_INVALID;
+	return fmt;
 }
 
 static void
@@ -2723,11 +2713,27 @@ set_precision(struct printf_spec *spec, int prec)
 	}
 }
 
+/*
+ * Turn a 1/2/4-byte value into a 64-bit one for printing: truncate
+ * as necessary and deal with signedness.
+ *
+ * 'size' is the size of the value in bytes.
+ */
+static unsigned long long convert_num_spec(unsigned int val, int size, struct printf_spec spec)
+{
+	unsigned int shift = 32 - size*8;
+
+	val <<= shift;
+	if (!(spec.flags & SIGN))
+		return val >> shift;
+	return (int)val >> shift;
+}
+
 /**
  * vsnprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @size: The size of the buffer, including the trailing null space
- * @fmt: The format string to use
+ * @fmt_str: The format string to use
  * @args: Arguments for the format string
  *
  * This function generally follows C99 vsnprintf, but has some
@@ -2751,11 +2757,14 @@ set_precision(struct printf_spec *spec, int prec)
  *
  * If you're not already dealing with a va_list consider using snprintf().
  */
-int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+int vsnprintf(char *buf, size_t size, const char *fmt_str, va_list args)
 {
-	unsigned long long num;
 	char *str, *end;
 	struct printf_spec spec = {0};
+	struct fmt fmt = {
+		.str = fmt_str,
+		.state = FORMAT_STATE_NONE,
+	};
 
 	/* Reject out-of-range values early.  Large positive sizes are
 	   used for unknown buffer sizes. */
@@ -2771,33 +2780,43 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 		size = end - buf;
 	}
 
-	while (*fmt) {
-		const char *old_fmt = fmt;
-		int read = format_decode(fmt, &spec);
+	while (*fmt.str) {
+		const char *old_fmt = fmt.str;
 
-		fmt += read;
+		fmt = format_decode(fmt, &spec);
 
-		switch (spec.type) {
-		case FORMAT_TYPE_NONE: {
-			int copy = read;
+		switch (fmt.state) {
+		case FORMAT_STATE_NONE: {
+			int read = fmt.str - old_fmt;
 			if (str < end) {
+				int copy = read;
 				if (copy > end - str)
 					copy = end - str;
 				memcpy(str, old_fmt, copy);
 			}
 			str += read;
-			break;
+			continue;
+		}
+
+		case FORMAT_STATE_NUM: {
+			unsigned long long num;
+			if (fmt.size <= sizeof(int))
+				num = convert_num_spec(va_arg(args, int), fmt.size, spec);
+			else
+				num = va_arg(args, long long);
+			str = number(str, end, num, spec);
+			continue;
 		}
 
-		case FORMAT_TYPE_WIDTH:
+		case FORMAT_STATE_WIDTH:
 			set_field_width(&spec, va_arg(args, int));
-			break;
+			continue;
 
-		case FORMAT_TYPE_PRECISION:
+		case FORMAT_STATE_PRECISION:
 			set_precision(&spec, va_arg(args, int));
-			break;
+			continue;
 
-		case FORMAT_TYPE_CHAR: {
+		case FORMAT_STATE_CHAR: {
 			char c;
 
 			if (!(spec.flags & LEFT)) {
@@ -2817,27 +2836,27 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 					*str = ' ';
 				++str;
 			}
-			break;
+			continue;
 		}
 
-		case FORMAT_TYPE_STR:
+		case FORMAT_STATE_STR:
 			str = string(str, end, va_arg(args, char *), spec);
-			break;
+			continue;
 
-		case FORMAT_TYPE_PTR:
-			str = pointer(fmt, str, end, va_arg(args, void *),
+		case FORMAT_STATE_PTR:
+			str = pointer(fmt.str, str, end, va_arg(args, void *),
 				      spec);
-			while (isalnum(*fmt))
-				fmt++;
-			break;
+			while (isalnum(*fmt.str))
+				fmt.str++;
+			continue;
 
-		case FORMAT_TYPE_PERCENT_CHAR:
+		case FORMAT_STATE_PERCENT_CHAR:
 			if (str < end)
 				*str = '%';
 			++str;
-			break;
+			continue;
 
-		case FORMAT_TYPE_INVALID:
+		default:
 			/*
 			 * Presumably the arguments passed gcc's type
 			 * checking, but there is no safe or sane way
@@ -2847,47 +2866,6 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			 * sync.
 			 */
 			goto out;
-
-		default:
-			switch (spec.type) {
-			case FORMAT_TYPE_LONG_LONG:
-				num = va_arg(args, long long);
-				break;
-			case FORMAT_TYPE_ULONG:
-				num = va_arg(args, unsigned long);
-				break;
-			case FORMAT_TYPE_LONG:
-				num = va_arg(args, long);
-				break;
-			case FORMAT_TYPE_SIZE_T:
-				if (spec.flags & SIGN)
-					num = va_arg(args, ssize_t);
-				else
-					num = va_arg(args, size_t);
-				break;
-			case FORMAT_TYPE_PTRDIFF:
-				num = va_arg(args, ptrdiff_t);
-				break;
-			case FORMAT_TYPE_UBYTE:
-				num = (unsigned char) va_arg(args, int);
-				break;
-			case FORMAT_TYPE_BYTE:
-				num = (signed char) va_arg(args, int);
-				break;
-			case FORMAT_TYPE_USHORT:
-				num = (unsigned short) va_arg(args, int);
-				break;
-			case FORMAT_TYPE_SHORT:
-				num = (short) va_arg(args, int);
-				break;
-			case FORMAT_TYPE_INT:
-				num = (int) va_arg(args, int);
-				break;
-			default:
-				num = va_arg(args, unsigned int);
-			}
-
-			str = number(str, end, num, spec);
 		}
 	}
 
@@ -3043,7 +3021,7 @@ EXPORT_SYMBOL(sprintf);
  * vbin_printf - Parse a format string and place args' binary value in a buffer
  * @bin_buf: The buffer to place args' binary value
  * @size: The size of the buffer(by words(32bits), not characters)
- * @fmt: The format string to use
+ * @fmt_str: The format string to use
  * @args: Arguments for the format string
  *
  * The format follows C99 vsnprintf, except %n is ignored, and its argument
@@ -3056,8 +3034,12 @@ EXPORT_SYMBOL(sprintf);
  * If the return value is greater than @size, the resulting bin_buf is NOT
  * valid for bstr_printf().
  */
-int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt_str, va_list args)
 {
+	struct fmt fmt = {
+		.str = fmt_str,
+		.state = FORMAT_STATE_NONE,
+	};
 	struct printf_spec spec = {0};
 	char *str, *end;
 	int width;
@@ -3089,31 +3071,29 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 	value;								\
 })
 
-	while (*fmt) {
-		int read = format_decode(fmt, &spec);
+	while (*fmt.str) {
+		fmt = format_decode(fmt, &spec);
 
-		fmt += read;
-
-		switch (spec.type) {
-		case FORMAT_TYPE_NONE:
-		case FORMAT_TYPE_PERCENT_CHAR:
+		switch (fmt.state) {
+		case FORMAT_STATE_NONE:
+		case FORMAT_STATE_PERCENT_CHAR:
 			break;
-		case FORMAT_TYPE_INVALID:
+		case FORMAT_STATE_INVALID:
 			goto out;
 
-		case FORMAT_TYPE_WIDTH:
-		case FORMAT_TYPE_PRECISION:
+		case FORMAT_STATE_WIDTH:
+		case FORMAT_STATE_PRECISION:
 			width = (int)save_arg(int);
 			/* Pointers may require the width */
-			if (*fmt == 'p')
+			if (*fmt.str == 'p')
 				set_field_width(&spec, width);
 			break;
 
-		case FORMAT_TYPE_CHAR:
+		case FORMAT_STATE_CHAR:
 			save_arg(char);
 			break;
 
-		case FORMAT_TYPE_STR: {
+		case FORMAT_STATE_STR: {
 			const char *save_str = va_arg(args, char *);
 			const char *err_msg;
 			size_t len;
@@ -3129,9 +3109,9 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 			break;
 		}
 
-		case FORMAT_TYPE_PTR:
+		case FORMAT_STATE_PTR:
 			/* Dereferenced pointers must be done now */
-			switch (*fmt) {
+			switch (*fmt.str) {
 			/* Dereference of functions is still OK */
 			case 'S':
 			case 's':
@@ -3141,11 +3121,11 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 				save_arg(void *);
 				break;
 			default:
-				if (!isalnum(*fmt)) {
+				if (!isalnum(*fmt.str)) {
 					save_arg(void *);
 					break;
 				}
-				str = pointer(fmt, str, end, va_arg(args, void *),
+				str = pointer(fmt.str, str, end, va_arg(args, void *),
 					      spec);
 				if (str + 1 < end)
 					*str++ = '\0';
@@ -3153,35 +3133,14 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 					end[-1] = '\0'; /* Must be nul terminated */
 			}
 			/* skip all alphanumeric pointer suffixes */
-			while (isalnum(*fmt))
-				fmt++;
+			while (isalnum(*fmt.str))
+				fmt.str++;
 			break;
 
-		default:
-			switch (spec.type) {
-
-			case FORMAT_TYPE_LONG_LONG:
+		case FORMAT_STATE_NUM:
+			if (fmt.size > sizeof(int)) {
 				save_arg(long long);
-				break;
-			case FORMAT_TYPE_ULONG:
-			case FORMAT_TYPE_LONG:
-				save_arg(unsigned long);
-				break;
-			case FORMAT_TYPE_SIZE_T:
-				save_arg(size_t);
-				break;
-			case FORMAT_TYPE_PTRDIFF:
-				save_arg(ptrdiff_t);
-				break;
-			case FORMAT_TYPE_UBYTE:
-			case FORMAT_TYPE_BYTE:
-				save_arg(char);
-				break;
-			case FORMAT_TYPE_USHORT:
-			case FORMAT_TYPE_SHORT:
-				save_arg(short);
-				break;
-			default:
+			} else {
 				save_arg(int);
 			}
 		}
@@ -3197,7 +3156,7 @@ EXPORT_SYMBOL_GPL(vbin_printf);
  * bstr_printf - Format a string from binary arguments and place it in a buffer
  * @buf: The buffer to place the result into
  * @size: The size of the buffer, including the trailing null space
- * @fmt: The format string to use
+ * @fmt_str: The format string to use
  * @bin_buf: Binary arguments for the format string
  *
  * This function like C99 vsnprintf, but the difference is that vsnprintf gets
@@ -3215,8 +3174,12 @@ EXPORT_SYMBOL_GPL(vbin_printf);
  * return is greater than or equal to @size, the resulting
  * string is truncated.
  */
-int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
+int bstr_printf(char *buf, size_t size, const char *fmt_str, const u32 *bin_buf)
 {
+	struct fmt fmt = {
+		.str = fmt_str,
+		.state = FORMAT_STATE_NONE,
+	};
 	struct printf_spec spec = {0};
 	char *str, *end;
 	const char *args = (const char *)bin_buf;
@@ -3248,33 +3211,33 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 		size = end - buf;
 	}
 
-	while (*fmt) {
-		const char *old_fmt = fmt;
-		int read = format_decode(fmt, &spec);
-
-		fmt += read;
+	while (*fmt.str) {
+		const char *old_fmt = fmt.str;
+		unsigned long long num;
 
-		switch (spec.type) {
-		case FORMAT_TYPE_NONE: {
-			int copy = read;
+		fmt = format_decode(fmt, &spec);
+		switch (fmt.state) {
+		case FORMAT_STATE_NONE: {
+			int read = fmt.str - old_fmt;
 			if (str < end) {
+				int copy = read;
 				if (copy > end - str)
 					copy = end - str;
 				memcpy(str, old_fmt, copy);
 			}
 			str += read;
-			break;
+			continue;
 		}
 
-		case FORMAT_TYPE_WIDTH:
+		case FORMAT_STATE_WIDTH:
 			set_field_width(&spec, get_arg(int));
-			break;
+			continue;
 
-		case FORMAT_TYPE_PRECISION:
+		case FORMAT_STATE_PRECISION:
 			set_precision(&spec, get_arg(int));
-			break;
+			continue;
 
-		case FORMAT_TYPE_CHAR: {
+		case FORMAT_STATE_CHAR: {
 			char c;
 
 			if (!(spec.flags & LEFT)) {
@@ -3293,21 +3256,21 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 					*str = ' ';
 				++str;
 			}
-			break;
+			continue;
 		}
 
-		case FORMAT_TYPE_STR: {
+		case FORMAT_STATE_STR: {
 			const char *str_arg = args;
 			args += strlen(str_arg) + 1;
 			str = string(str, end, (char *)str_arg, spec);
-			break;
+			continue;
 		}
 
-		case FORMAT_TYPE_PTR: {
+		case FORMAT_STATE_PTR: {
 			bool process = false;
 			int copy, len;
 			/* Non function dereferences were already done */
-			switch (*fmt) {
+			switch (*fmt.str) {
 			case 'S':
 			case 's':
 			case 'x':
@@ -3316,7 +3279,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 				process = true;
 				break;
 			default:
-				if (!isalnum(*fmt)) {
+				if (!isalnum(*fmt.str)) {
 					process = true;
 					break;
 				}
@@ -3331,63 +3294,32 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 				}
 			}
 			if (process)
-				str = pointer(fmt, str, end, get_arg(void *), spec);
+				str = pointer(fmt.str, str, end, get_arg(void *), spec);
 
-			while (isalnum(*fmt))
-				fmt++;
-			break;
+			while (isalnum(*fmt.str))
+				fmt.str++;
+			continue;
 		}
 
-		case FORMAT_TYPE_PERCENT_CHAR:
+		case FORMAT_STATE_PERCENT_CHAR:
 			if (str < end)
 				*str = '%';
 			++str;
-			break;
+			continue;
 
-		case FORMAT_TYPE_INVALID:
+		case FORMAT_STATE_INVALID:
 			goto out;
 
-		default: {
-			unsigned long long num;
-
-			switch (spec.type) {
-
-			case FORMAT_TYPE_LONG_LONG:
+		case FORMAT_STATE_NUM:
+			if (fmt.size > sizeof(int)) {
 				num = get_arg(long long);
-				break;
-			case FORMAT_TYPE_ULONG:
-			case FORMAT_TYPE_LONG:
-				num = get_arg(unsigned long);
-				break;
-			case FORMAT_TYPE_SIZE_T:
-				num = get_arg(size_t);
-				break;
-			case FORMAT_TYPE_PTRDIFF:
-				num = get_arg(ptrdiff_t);
-				break;
-			case FORMAT_TYPE_UBYTE:
-				num = get_arg(unsigned char);
-				break;
-			case FORMAT_TYPE_BYTE:
-				num = get_arg(signed char);
-				break;
-			case FORMAT_TYPE_USHORT:
-				num = get_arg(unsigned short);
-				break;
-			case FORMAT_TYPE_SHORT:
-				num = get_arg(short);
-				break;
-			case FORMAT_TYPE_UINT:
-				num = get_arg(unsigned int);
-				break;
-			default:
-				num = get_arg(int);
+			} else {
+				num = convert_num_spec(get_arg(int), fmt.size, spec);
 			}
-
 			str = number(str, end, num, spec);
-		} /* default: */
-		} /* switch(spec.type) */
-	} /* while(*fmt) */
+			continue;
+		}
+	} /* while(*fmt.str) */
 
 out:
 	if (size > 0) {
@@ -3404,29 +3336,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(bstr_printf);
 
-/**
- * bprintf - Parse a format string and place args' binary value in a buffer
- * @bin_buf: The buffer to place args' binary value
- * @size: The size of the buffer(by words(32bits), not characters)
- * @fmt: The format string to use
- * @...: Arguments for the format string
- *
- * The function returns the number of words(u32) written
- * into @bin_buf.
- */
-int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
-{
-	va_list args;
-	int ret;
-
-	va_start(args, fmt);
-	ret = vbin_printf(bin_buf, size, fmt, args);
-	va_end(args);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(bprintf);
-
 #endif /* CONFIG_BINARY_PRINTF */
 
 /**
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
index ec10506834b6..1682e614309c 100644
--- a/lib/win_minmax.c
+++ b/lib/win_minmax.c
@@ -97,3 +97,4 @@ u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
 
 	return minmax_subwin_update(m, win, &val);
 }
+EXPORT_SYMBOL(minmax_running_min);
diff --git a/lib/xarray.c b/lib/xarray.c
index 5e7d6334d70d..116e9286c64e 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -125,19 +125,20 @@ static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
  */
 static void xas_squash_marks(const struct xa_state *xas)
 {
-	unsigned int mark = 0;
+	xa_mark_t mark = 0;
 	unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
 
-	if (!xas->xa_sibs)
-		return;
+	for (;;) {
+		unsigned long *marks = node_marks(xas->xa_node, mark);
 
-	do {
-		unsigned long *marks = xas->xa_node->marks[mark];
-		if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
-			continue;
-		__set_bit(xas->xa_offset, marks);
-		bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
-	} while (mark++ != (__force unsigned)XA_MARK_MAX);
+		if (find_next_bit(marks, limit, xas->xa_offset + 1) != limit) {
+			__set_bit(xas->xa_offset, marks);
+			bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
+		}
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
 }
 
 /* extracts the offset within this node from the index */
@@ -200,7 +201,8 @@ static void *xas_start(struct xa_state *xas)
 	return entry;
 }
 
-static void *xas_descend(struct xa_state *xas, struct xa_node *node)
+static __always_inline void *xas_descend(struct xa_state *xas,
+					struct xa_node *node)
 {
 	unsigned int offset = get_offset(xas->xa_index, node);
 	void *entry = xa_entry(xas->xa, node, offset);
@@ -434,6 +436,11 @@ static unsigned long max_index(void *entry)
 	return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
 }
 
+static inline void *xa_zero_to_null(void *entry)
+{
+	return xa_is_zero(entry) ? NULL : entry;
+}
+
 static void xas_shrink(struct xa_state *xas)
 {
 	struct xarray *xa = xas->xa;
@@ -450,8 +457,8 @@ static void xas_shrink(struct xa_state *xas)
 			break;
 		if (!xa_is_node(entry) && node->shift)
 			break;
-		if (xa_is_zero(entry) && xa_zero_busy(xa))
-			entry = NULL;
+		if (xa_zero_busy(xa))
+			entry = xa_zero_to_null(entry);
 		xas->xa_node = XAS_BOUNDS;
 
 		RCU_INIT_POINTER(xa->xa_head, entry);
@@ -1021,7 +1028,7 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
 	unsigned int mask = xas->xa_sibs;
 
 	/* XXX: no support for splitting really large entries yet */
-	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
+	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT <= order))
 		goto nomem;
 	if (xas->xa_shift + XA_CHUNK_SHIFT > order)
 		return;
@@ -1146,6 +1153,7 @@ void xas_pause(struct xa_state *xas)
 			if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
 				break;
 		}
+		xas->xa_index &= ~0UL << node->shift;
 		xas->xa_index += (offset - xas->xa_offset) << node->shift;
 		if (xas->xa_index == 0)
 			xas->xa_node = XAS_BOUNDS;
@@ -1381,6 +1389,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
 		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
 		if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK))
 			continue;
+		if (xa_is_sibling(entry))
+			continue;
 		if (!xa_is_node(entry))
 			return entry;
 		xas->xa_node = xa_to_node(entry);
@@ -1473,9 +1483,7 @@ void *xa_load(struct xarray *xa, unsigned long index)
 
 	rcu_read_lock();
 	do {
-		entry = xas_load(&xas);
-		if (xa_is_zero(entry))
-			entry = NULL;
+		entry = xa_zero_to_null(xas_load(&xas));
 	} while (xas_retry(&xas, entry));
 	rcu_read_unlock();
 
@@ -1485,8 +1493,6 @@ EXPORT_SYMBOL(xa_load);
 
 static void *xas_result(struct xa_state *xas, void *curr)
 {
-	if (xa_is_zero(curr))
-		return NULL;
 	if (xas_error(xas))
 		curr = xas->xa_node;
 	return curr;
@@ -1507,7 +1513,7 @@ static void *xas_result(struct xa_state *xas, void *curr)
 void *__xa_erase(struct xarray *xa, unsigned long index)
 {
 	XA_STATE(xas, xa, index);
-	return xas_result(&xas, xas_store(&xas, NULL));
+	return xas_result(&xas, xa_zero_to_null(xas_store(&xas, NULL)));
 }
 EXPORT_SYMBOL(__xa_erase);
 
@@ -1566,7 +1572,7 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 			xas_clear_mark(&xas, XA_FREE_MARK);
 	} while (__xas_nomem(&xas, gfp));
 
-	return xas_result(&xas, curr);
+	return xas_result(&xas, xa_zero_to_null(curr));
 }
 EXPORT_SYMBOL(__xa_store);
 
@@ -1599,6 +1605,9 @@ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 }
 EXPORT_SYMBOL(xa_store);
 
+static inline void *__xa_cmpxchg_raw(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp);
+
 /**
  * __xa_cmpxchg() - Store this entry in the XArray.
  * @xa: XArray.
@@ -1618,6 +1627,13 @@ EXPORT_SYMBOL(xa_store);
 void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 			void *old, void *entry, gfp_t gfp)
 {
+	return xa_zero_to_null(__xa_cmpxchg_raw(xa, index, old, entry, gfp));
+}
+EXPORT_SYMBOL(__xa_cmpxchg);
+
+static inline void *__xa_cmpxchg_raw(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
 	XA_STATE(xas, xa, index);
 	void *curr;
 
@@ -1635,7 +1651,6 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 
 	return xas_result(&xas, curr);
 }
-EXPORT_SYMBOL(__xa_cmpxchg);
 
 /**
  * __xa_insert() - Store this entry in the XArray if no entry is present.
@@ -1655,26 +1670,16 @@ EXPORT_SYMBOL(__xa_cmpxchg);
  */
 int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 {
-	XA_STATE(xas, xa, index);
 	void *curr;
+	int errno;
 
-	if (WARN_ON_ONCE(xa_is_advanced(entry)))
-		return -EINVAL;
 	if (!entry)
 		entry = XA_ZERO_ENTRY;
-
-	do {
-		curr = xas_load(&xas);
-		if (!curr) {
-			xas_store(&xas, entry);
-			if (xa_track_free(xa))
-				xas_clear_mark(&xas, XA_FREE_MARK);
-		} else {
-			xas_set_err(&xas, -EBUSY);
-		}
-	} while (__xas_nomem(&xas, gfp));
-
-	return xas_error(&xas);
+	curr = __xa_cmpxchg_raw(xa, index, NULL, entry, gfp);
+	errno = xa_err(curr);
+	if (errno)
+		return errno;
+	return (curr != NULL) ? -EBUSY : 0;
 }
 EXPORT_SYMBOL(__xa_insert);
 
@@ -1765,39 +1770,52 @@ unlock:
 EXPORT_SYMBOL(xa_store_range);
 
 /**
- * xa_get_order() - Get the order of an entry.
- * @xa: XArray.
- * @index: Index of the entry.
+ * xas_get_order() - Get the order of an entry.
+ * @xas: XArray operation state.
+ *
+ * Called after xas_load, the xas should not be in an error state.
  *
  * Return: A number between 0 and 63 indicating the order of the entry.
  */
-int xa_get_order(struct xarray *xa, unsigned long index)
+int xas_get_order(struct xa_state *xas)
 {
-	XA_STATE(xas, xa, index);
-	void *entry;
 	int order = 0;
 
-	rcu_read_lock();
-	entry = xas_load(&xas);
-
-	if (!entry)
-		goto unlock;
-
-	if (!xas.xa_node)
-		goto unlock;
+	if (!xas->xa_node)
+		return 0;
 
 	for (;;) {
-		unsigned int slot = xas.xa_offset + (1 << order);
+		unsigned int slot = xas->xa_offset + (1 << order);
 
 		if (slot >= XA_CHUNK_SIZE)
 			break;
-		if (!xa_is_sibling(xas.xa_node->slots[slot]))
+		if (!xa_is_sibling(xa_entry(xas->xa, xas->xa_node, slot)))
 			break;
 		order++;
 	}
 
-	order += xas.xa_node->shift;
-unlock:
+	order += xas->xa_node->shift;
+	return order;
+}
+EXPORT_SYMBOL_GPL(xas_get_order);
+
+/**
+ * xa_get_order() - Get the order of an entry.
+ * @xa: XArray.
+ * @index: Index of the entry.
+ *
+ * Return: A number between 0 and 63 indicating the order of the entry.
+ */
+int xa_get_order(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	int order = 0;
+	void *entry;
+
+	rcu_read_lock();
+	entry = xas_load(&xas);
+	if (entry)
+		order = xas_get_order(&xas);
 	rcu_read_unlock();
 
 	return order;
diff --git a/lib/xxhash.c b/lib/xxhash.c
index d5bb9ff10607..b5bd567aa6b3 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -38,7 +38,7 @@
  * - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
 
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 #include <linux/errno.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
index aef086a6bf2f..20aa459bfb3e 100644
--- a/lib/xz/Kconfig
+++ b/lib/xz/Kconfig
@@ -5,7 +5,8 @@ config XZ_DEC
 	help
 	  LZMA2 compression algorithm and BCJ filters are supported using
 	  the .xz file format as the container. For integrity checking,
-	  CRC32 is supported. See Documentation/staging/xz.rst for more information.
+	  CRC32 is supported. See Documentation/staging/xz.rst for more
+	  information.
 
 if XZ_DEC
 
@@ -29,11 +30,21 @@ config XZ_DEC_ARMTHUMB
 	default y
 	select XZ_DEC_BCJ
 
+config XZ_DEC_ARM64
+	bool "ARM64 BCJ filter decoder" if EXPERT
+	default y
+	select XZ_DEC_BCJ
+
 config XZ_DEC_SPARC
 	bool "SPARC BCJ filter decoder" if EXPERT
 	default y
 	select XZ_DEC_BCJ
 
+config XZ_DEC_RISCV
+	bool "RISC-V BCJ filter decoder" if EXPERT
+	default y
+	select XZ_DEC_BCJ
+
 config XZ_DEC_MICROLZMA
 	bool "MicroLZMA decoder"
 	default n
diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c
index 88a2c35e1b59..6a7906a328ba 100644
--- a/lib/xz/xz_crc32.c
+++ b/lib/xz/xz_crc32.c
@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * CRC32 using the polynomial from IEEE-802.3
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
  *          Igor Pavlov <https://7-zip.org/>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 /*
@@ -27,9 +26,9 @@
 
 STATIC_RW_DATA uint32_t xz_crc32_table[256];
 
-XZ_EXTERN void xz_crc32_init(void)
+void xz_crc32_init(void)
 {
-	const uint32_t poly = CRC32_POLY_LE;
+	const uint32_t poly = 0xEDB88320;
 
 	uint32_t i;
 	uint32_t j;
@@ -46,7 +45,7 @@ XZ_EXTERN void xz_crc32_init(void)
 	return;
 }
 
-XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
+uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
 {
 	crc = ~crc;
 
diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c
index ef449e97d1a1..8237db17eee3 100644
--- a/lib/xz/xz_dec_bcj.c
+++ b/lib/xz/xz_dec_bcj.c
@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * Branch/Call/Jump (BCJ) filter decoders
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
  *          Igor Pavlov <https://7-zip.org/>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #include "xz_private.h"
@@ -24,7 +23,9 @@ struct xz_dec_bcj {
 		BCJ_IA64 = 6,       /* Big or little endian */
 		BCJ_ARM = 7,        /* Little endian only */
 		BCJ_ARMTHUMB = 8,   /* Little endian only */
-		BCJ_SPARC = 9       /* Big or little endian */
+		BCJ_SPARC = 9,      /* Big or little endian */
+		BCJ_ARM64 = 10,     /* AArch64 */
+		BCJ_RISCV = 11      /* RV32GQC_Zfh, RV64GQC_Zfh */
 	} type;
 
 	/*
@@ -162,7 +163,9 @@ static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 	size_t i;
 	uint32_t instr;
 
-	for (i = 0; i + 4 <= size; i += 4) {
+	size &= ~(size_t)3;
+
+	for (i = 0; i < size; i += 4) {
 		instr = get_unaligned_be32(buf + i);
 		if ((instr & 0xFC000003) == 0x48000001) {
 			instr &= 0x03FFFFFC;
@@ -219,7 +222,9 @@ static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 	/* Instruction normalized with bit_res for easier manipulation */
 	uint64_t norm;
 
-	for (i = 0; i + 16 <= size; i += 16) {
+	size &= ~(size_t)15;
+
+	for (i = 0; i < size; i += 16) {
 		mask = branch_table[buf[i] & 0x1F];
 		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
 			if (((mask >> slot) & 1) == 0)
@@ -267,7 +272,9 @@ static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 	size_t i;
 	uint32_t addr;
 
-	for (i = 0; i + 4 <= size; i += 4) {
+	size &= ~(size_t)3;
+
+	for (i = 0; i < size; i += 4) {
 		if (buf[i + 3] == 0xEB) {
 			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
 					| ((uint32_t)buf[i + 2] << 16);
@@ -290,7 +297,12 @@ static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 	size_t i;
 	uint32_t addr;
 
-	for (i = 0; i + 4 <= size; i += 2) {
+	if (size < 4)
+		return 0;
+
+	size -= 4;
+
+	for (i = 0; i <= size; i += 2) {
 		if ((buf[i + 1] & 0xF8) == 0xF0
 				&& (buf[i + 3] & 0xF8) == 0xF8) {
 			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
@@ -318,7 +330,9 @@ static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 	size_t i;
 	uint32_t instr;
 
-	for (i = 0; i + 4 <= size; i += 4) {
+	size &= ~(size_t)3;
+
+	for (i = 0; i < size; i += 4) {
 		instr = get_unaligned_be32(buf + i);
 		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
 			instr <<= 2;
@@ -334,6 +348,140 @@ static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 }
 #endif
 
+#ifdef XZ_DEC_ARM64
+static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	size_t i;
+	uint32_t instr;
+	uint32_t addr;
+
+	size &= ~(size_t)3;
+
+	for (i = 0; i < size; i += 4) {
+		instr = get_unaligned_le32(buf + i);
+
+		if ((instr >> 26) == 0x25) {
+			/* BL instruction */
+			addr = instr - ((s->pos + (uint32_t)i) >> 2);
+			instr = 0x94000000 | (addr & 0x03FFFFFF);
+			put_unaligned_le32(instr, buf + i);
+
+		} else if ((instr & 0x9F000000) == 0x90000000) {
+			/* ADRP instruction */
+			addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC);
+
+			/* Only convert values in the range +/-512 MiB. */
+			if ((addr + 0x020000) & 0x1C0000)
+				continue;
+
+			addr -= (s->pos + (uint32_t)i) >> 12;
+
+			instr &= 0x9000001F;
+			instr |= (addr & 3) << 29;
+			instr |= (addr & 0x03FFFC) << 3;
+			instr |= (0U - (addr & 0x020000)) & 0xE00000;
+
+			put_unaligned_le32(instr, buf + i);
+		}
+	}
+
+	return i;
+}
+#endif
+
+#ifdef XZ_DEC_RISCV
+static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	size_t i;
+	uint32_t b1;
+	uint32_t b2;
+	uint32_t b3;
+	uint32_t instr;
+	uint32_t instr2;
+	uint32_t instr2_rs1;
+	uint32_t addr;
+
+	if (size < 8)
+		return 0;
+
+	size -= 8;
+
+	for (i = 0; i <= size; i += 2) {
+		instr = buf[i];
+
+		if (instr == 0xEF) {
+			/* JAL */
+			b1 = buf[i + 1];
+			if ((b1 & 0x0D) != 0)
+				continue;
+
+			b2 = buf[i + 2];
+			b3 = buf[i + 3];
+
+			addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1);
+			addr -= s->pos + (uint32_t)i;
+
+			buf[i + 1] = (uint8_t)((b1 & 0x0F)
+					| ((addr >> 8) & 0xF0));
+
+			buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F)
+					| ((addr >> 7) & 0x10)
+					| ((addr << 4) & 0xE0));
+
+			buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F)
+					| ((addr >> 13) & 0x80));
+
+			i += 4 - 2;
+
+		} else if ((instr & 0x7F) == 0x17) {
+			/* AUIPC */
+			instr |= (uint32_t)buf[i + 1] << 8;
+			instr |= (uint32_t)buf[i + 2] << 16;
+			instr |= (uint32_t)buf[i + 3] << 24;
+
+			if (instr & 0xE80) {
+				/* AUIPC's rd doesn't equal x0 or x2. */
+				instr2 = get_unaligned_le32(buf + i + 4);
+
+				if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) {
+					i += 6 - 2;
+					continue;
+				}
+
+				addr = (instr & 0xFFFFF000) + (instr2 >> 20);
+
+				instr = 0x17 | (2 << 7) | (instr2 << 12);
+				instr2 = addr;
+			} else {
+				/* AUIPC's rd equals x0 or x2. */
+				instr2_rs1 = instr >> 27;
+
+				if ((uint32_t)((instr - 0x3117) << 18)
+						>= (instr2_rs1 & 0x1D)) {
+					i += 4 - 2;
+					continue;
+				}
+
+				addr = get_unaligned_be32(buf + i + 4);
+				addr -= s->pos + (uint32_t)i;
+
+				instr2 = (instr >> 12) | (addr << 20);
+
+				instr = 0x17 | (instr2_rs1 << 7)
+					| ((addr + 0x800) & 0xFFFFF000);
+			}
+
+			put_unaligned_le32(instr, buf + i);
+			put_unaligned_le32(instr2, buf + i + 4);
+
+			i += 8 - 2;
+		}
+	}
+
+	return i;
+}
+#endif
+
 /*
  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
  * of data that got filtered.
@@ -381,6 +529,16 @@ static void bcj_apply(struct xz_dec_bcj *s,
 		filtered = bcj_sparc(s, buf, size);
 		break;
 #endif
+#ifdef XZ_DEC_ARM64
+	case BCJ_ARM64:
+		filtered = bcj_arm64(s, buf, size);
+		break;
+#endif
+#ifdef XZ_DEC_RISCV
+	case BCJ_RISCV:
+		filtered = bcj_riscv(s, buf, size);
+		break;
+#endif
 	default:
 		/* Never reached but silence compiler warnings. */
 		filtered = 0;
@@ -414,9 +572,8 @@ static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
  * data in chunks of 1-16 bytes. To hide this issue, this function does
  * some buffering.
  */
-XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
-				     struct xz_dec_lzma2 *lzma2,
-				     struct xz_buf *b)
+enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2,
+			   struct xz_buf *b)
 {
 	size_t out_start;
 
@@ -524,7 +681,7 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 	return s->ret;
 }
 
-XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
+struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
 {
 	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (s != NULL)
@@ -533,7 +690,7 @@ XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
 	return s;
 }
 
-XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
+enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
 {
 	switch (id) {
 #ifdef XZ_DEC_X86
@@ -554,6 +711,12 @@ XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
 #ifdef XZ_DEC_SPARC
 	case BCJ_SPARC:
 #endif
+#ifdef XZ_DEC_ARM64
+	case BCJ_ARM64:
+#endif
+#ifdef XZ_DEC_RISCV
+	case BCJ_RISCV:
+#endif
 		break;
 
 	default:
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index 27ce34520e78..83bb66b6016d 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -1,11 +1,10 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * LZMA2 decoder
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
  *          Igor Pavlov <https://7-zip.org/>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #include "xz_private.h"
@@ -961,8 +960,7 @@ static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
  * Take care of the LZMA2 control layer, and forward the job of actual LZMA
  * decoding or copying of uncompressed chunks to other functions.
  */
-XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
-				       struct xz_buf *b)
+enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b)
 {
 	uint32_t tmp;
 
@@ -1138,8 +1136,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 	return XZ_OK;
 }
 
-XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
-						   uint32_t dict_max)
+struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, uint32_t dict_max)
 {
 	struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (s == NULL)
@@ -1162,7 +1159,7 @@ XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
 	return s;
 }
 
-XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
+enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
 {
 	/* This limits dictionary size to 3 GiB to keep parsing simpler. */
 	if (props > 39)
@@ -1198,7 +1195,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
 	return XZ_OK;
 }
 
-XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
+void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
 {
 	if (DEC_IS_MULTI(s->dict.mode))
 		vfree(s->dict.buf);
diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c
index 683570b93a8c..f9d003684d56 100644
--- a/lib/xz/xz_dec_stream.c
+++ b/lib/xz/xz_dec_stream.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * .xz Stream decoder
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #include "xz_private.h"
@@ -747,7 +746,7 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
  * actually succeeds (that's the price to pay of using the output buffer as
  * the workspace).
  */
-XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
+enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
 {
 	size_t in_start;
 	size_t out_start;
@@ -783,7 +782,7 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
 	return ret;
 }
 
-XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
+struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
 {
 	struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (s == NULL)
@@ -813,7 +812,7 @@ error_bcj:
 	return NULL;
 }
 
-XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
+void xz_dec_reset(struct xz_dec *s)
 {
 	s->sequence = SEQ_STREAM_HEADER;
 	s->allow_buf_error = false;
@@ -825,7 +824,7 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
 	s->temp.size = STREAM_HEADER_SIZE;
 }
 
-XZ_EXTERN void xz_dec_end(struct xz_dec *s)
+void xz_dec_end(struct xz_dec *s)
 {
 	if (s != NULL) {
 		xz_dec_lzma2_end(s->lzma2);
diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c
index 61098c67a413..f40817d65897 100644
--- a/lib/xz/xz_dec_syms.c
+++ b/lib/xz/xz_dec_syms.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * XZ decoder module information
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #include <linux/module.h>
@@ -23,11 +22,6 @@ EXPORT_SYMBOL(xz_dec_microlzma_end);
 #endif
 
 MODULE_DESCRIPTION("XZ decompressor");
-MODULE_VERSION("1.1");
+MODULE_VERSION("1.2");
 MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
-
-/*
- * This code is in the public domain, but in Linux it's simplest to just
- * say it's GPL and consider the authors as the copyright holders.
- */
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/xz/xz_dec_test.c b/lib/xz/xz_dec_test.c
index da28a19d6c98..53d3600f2ddb 100644
--- a/lib/xz/xz_dec_test.c
+++ b/lib/xz/xz_dec_test.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: 0BSD
+
 /*
  * XZ decoder tester
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #include <linux/kernel.h>
@@ -212,9 +211,4 @@ module_exit(xz_dec_test_exit);
 MODULE_DESCRIPTION("XZ decompressor tester");
 MODULE_VERSION("1.0");
 MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org>");
-
-/*
- * This code is in the public domain, but in Linux it's simplest to just
- * say it's GPL and consider the authors as the copyright holders.
- */
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h
index 92d852d4f87a..d2632b7dfb9c 100644
--- a/lib/xz/xz_lzma2.h
+++ b/lib/xz/xz_lzma2.h
@@ -1,11 +1,10 @@
+/* SPDX-License-Identifier: 0BSD */
+
 /*
  * LZMA2 definitions
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
  *          Igor Pavlov <https://7-zip.org/>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #ifndef XZ_LZMA2_H
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
index bf1e94ec7873..8409784b1639 100644
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@@ -1,10 +1,9 @@
+/* SPDX-License-Identifier: 0BSD */
+
 /*
  * Private includes and definitions
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #ifndef XZ_PRIVATE_H
@@ -13,7 +12,7 @@
 #ifdef __KERNEL__
 #	include <linux/xz.h>
 #	include <linux/kernel.h>
-#	include <asm/unaligned.h>
+#	include <linux/unaligned.h>
 	/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
 #	ifndef XZ_PREBOOT
 #		include <linux/slab.h>
@@ -37,6 +36,12 @@
 #		ifdef CONFIG_XZ_DEC_SPARC
 #			define XZ_DEC_SPARC
 #		endif
+#		ifdef CONFIG_XZ_DEC_ARM64
+#			define XZ_DEC_ARM64
+#		endif
+#		ifdef CONFIG_XZ_DEC_RISCV
+#			define XZ_DEC_RISCV
+#		endif
 #		ifdef CONFIG_XZ_DEC_MICROLZMA
 #			define XZ_DEC_MICROLZMA
 #		endif
@@ -98,23 +103,19 @@
  */
 #ifndef XZ_DEC_BCJ
 #	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
-			|| defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
+			|| defined(XZ_DEC_IA64) \
 			|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
-			|| defined(XZ_DEC_SPARC)
+			|| defined(XZ_DEC_SPARC) || defined(XZ_DEC_ARM64) \
+			|| defined(XZ_DEC_RISCV)
 #		define XZ_DEC_BCJ
 #	endif
 #endif
 
-#ifndef CRC32_POLY_LE
-#define CRC32_POLY_LE 0xedb88320
-#endif
-
 /*
  * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
  * before calling xz_dec_lzma2_run().
  */
-XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
-						   uint32_t dict_max);
+struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, uint32_t dict_max);
 
 /*
  * Decode the LZMA2 properties (one byte) and reset the decoder. Return
@@ -122,22 +123,20 @@ XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
  * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
  * decoder doesn't support.
  */
-XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
-					 uint8_t props);
+enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props);
 
 /* Decode raw LZMA2 stream from b->in to b->out. */
-XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
-				       struct xz_buf *b);
+enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b);
 
 /* Free the memory allocated for the LZMA2 decoder. */
-XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
+void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
 
 #ifdef XZ_DEC_BCJ
 /*
  * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
  * calling xz_dec_bcj_run().
  */
-XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
+struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
 
 /*
  * Decode the Filter ID of a BCJ filter. This implementation doesn't
@@ -145,16 +144,15 @@ XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
  * is needed. Returns XZ_OK if the given Filter ID is supported.
  * Otherwise XZ_OPTIONS_ERROR is returned.
  */
-XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
+enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
 
 /*
  * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
  * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
  * must be called directly.
  */
-XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
-				     struct xz_dec_lzma2 *lzma2,
-				     struct xz_buf *b);
+enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2,
+			   struct xz_buf *b);
 
 /* Free the memory allocated for the BCJ filters. */
 #define xz_dec_bcj_end(s) kfree(s)
diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h
index 430bb3a0d195..55f9f6f94b78 100644
--- a/lib/xz/xz_stream.h
+++ b/lib/xz/xz_stream.h
@@ -1,10 +1,9 @@
+/* SPDX-License-Identifier: 0BSD */
+
 /*
  * Definitions for handling the .xz file format
  *
  * Author: Lasse Collin <lasse.collin@tukaani.org>
- *
- * This file has been put into the public domain.
- * You can do whatever you want with this file.
  */
 
 #ifndef XZ_STREAM_H
diff --git a/lib/zlib_deflate/deflate_syms.c b/lib/zlib_deflate/deflate_syms.c
index 24b740b99678..68941a2350ea 100644
--- a/lib/zlib_deflate/deflate_syms.c
+++ b/lib/zlib_deflate/deflate_syms.c
@@ -17,4 +17,5 @@ EXPORT_SYMBOL(zlib_deflate);
 EXPORT_SYMBOL(zlib_deflateInit2);
 EXPORT_SYMBOL(zlib_deflateEnd);
 EXPORT_SYMBOL(zlib_deflateReset);
+MODULE_DESCRIPTION("Data compression using the deflation algorithm");
 MODULE_LICENSE("GPL");
diff --git a/lib/zlib_dfltcc/dfltcc.h b/lib/zlib_dfltcc/dfltcc.h
index b96232bdd44d..0f2a16d7a48a 100644
--- a/lib/zlib_dfltcc/dfltcc.h
+++ b/lib/zlib_dfltcc/dfltcc.h
@@ -80,6 +80,7 @@ struct dfltcc_param_v0 {
     uint8_t csb[1152];
 };
 
+static_assert(offsetof(struct dfltcc_param_v0, csb) == 384);
 static_assert(sizeof(struct dfltcc_param_v0) == 1536);
 
 #define CVT_CRC32 0
diff --git a/lib/zlib_dfltcc/dfltcc_util.h b/lib/zlib_dfltcc/dfltcc_util.h
index 4a46b5009f0d..10509270d822 100644
--- a/lib/zlib_dfltcc/dfltcc_util.h
+++ b/lib/zlib_dfltcc/dfltcc_util.h
@@ -2,6 +2,8 @@
 #ifndef DFLTCC_UTIL_H
 #define DFLTCC_UTIL_H
 
+#include "dfltcc.h"
+#include <linux/kmsan-checks.h>
 #include <linux/zutil.h>
 
 /*
@@ -20,6 +22,7 @@ typedef enum {
 #define DFLTCC_CMPR 2
 #define DFLTCC_XPND 4
 #define HBT_CIRCULAR (1 << 7)
+#define DFLTCC_FN_MASK ((1 << 7) - 1)
 #define HB_BITS 15
 #define HB_SIZE (1 << HB_BITS)
 
@@ -34,6 +37,7 @@ static inline dfltcc_cc dfltcc(
 )
 {
     Byte *t2 = op1 ? *op1 : NULL;
+    unsigned char *orig_t2 = t2;
     size_t t3 = len1 ? *len1 : 0;
     const Byte *t4 = op2 ? *op2 : NULL;
     size_t t5 = len2 ? *len2 : 0;
@@ -59,6 +63,30 @@ static inline dfltcc_cc dfltcc(
                      : "cc", "memory");
     t2 = r2; t3 = r3; t4 = r4; t5 = r5;
 
+    /*
+     * Unpoison the parameter block and the output buffer.
+     * This is a no-op in non-KMSAN builds.
+     */
+    switch (fn & DFLTCC_FN_MASK) {
+    case DFLTCC_QAF:
+        kmsan_unpoison_memory(param, sizeof(struct dfltcc_qaf_param));
+        break;
+    case DFLTCC_GDHT:
+        kmsan_unpoison_memory(param, offsetof(struct dfltcc_param_v0, csb));
+        break;
+    case DFLTCC_CMPR:
+        kmsan_unpoison_memory(param, sizeof(struct dfltcc_param_v0));
+        kmsan_unpoison_memory(
+                orig_t2,
+                t2 - orig_t2 +
+                    (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1));
+        break;
+    case DFLTCC_XPND:
+        kmsan_unpoison_memory(param, sizeof(struct dfltcc_param_v0));
+        kmsan_unpoison_memory(orig_t2, t2 - orig_t2);
+        break;
+    }
+
     if (op1)
         *op1 = t2;
     if (len1)
diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
index 1d9cc03924ca..c22a2e69bf46 100644
--- a/lib/zstd/common/mem.h
+++ b/lib/zstd/common/mem.h
@@ -15,7 +15,7 @@
 /*-****************************************
 *  Dependencies
 ******************************************/
-#include <asm/unaligned.h>  /* get_unaligned, put_unaligned* */
+#include <linux/unaligned.h>  /* get_unaligned, put_unaligned* */
 #include <linux/compiler.h>  /* inline */
 #include <linux/swab.h>  /* swab32, swab64 */
 #include <linux/types.h>  /* size_t, ptrdiff_t */
diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
index f620cafca633..16bb995bc6c4 100644
--- a/lib/zstd/compress/zstd_compress.c
+++ b/lib/zstd/compress/zstd_compress.c
@@ -4810,6 +4810,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
                         dictLoadMethod, cctxParams.cParams,
                         cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
                         customMem);
+    if (!cdict)
+        return NULL;
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
                                     dict, dictSize,
diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
index 04e1b5c01d9b..bd8784449b31 100644
--- a/lib/zstd/zstd_compress_module.c
+++ b/lib/zstd/zstd_compress_module.c
@@ -66,6 +66,12 @@ int zstd_max_clevel(void)
 }
 EXPORT_SYMBOL(zstd_max_clevel);
 
+int zstd_default_clevel(void)
+{
+	return ZSTD_defaultCLevel();
+}
+EXPORT_SYMBOL(zstd_default_clevel);
+
 size_t zstd_compress_bound(size_t src_size)
 {
 	return ZSTD_compressBound(src_size);
@@ -79,6 +85,13 @@ zstd_parameters zstd_get_params(int level,
 }
 EXPORT_SYMBOL(zstd_get_params);
 
+zstd_compression_parameters zstd_get_cparams(int level,
+	unsigned long long estimated_src_size, size_t dict_size)
+{
+	return ZSTD_getCParams(level, estimated_src_size, dict_size);
+}
+EXPORT_SYMBOL(zstd_get_cparams);
+
 size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
 {
 	return ZSTD_estimateCCtxSize_usingCParams(*cparams);
@@ -93,6 +106,33 @@ zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
 }
 EXPORT_SYMBOL(zstd_init_cctx);
 
+zstd_cctx *zstd_create_cctx_advanced(zstd_custom_mem custom_mem)
+{
+	return ZSTD_createCCtx_advanced(custom_mem);
+}
+EXPORT_SYMBOL(zstd_create_cctx_advanced);
+
+size_t zstd_free_cctx(zstd_cctx *cctx)
+{
+	return ZSTD_freeCCtx(cctx);
+}
+EXPORT_SYMBOL(zstd_free_cctx);
+
+zstd_cdict *zstd_create_cdict_byreference(const void *dict, size_t dict_size,
+					  zstd_compression_parameters cparams,
+					  zstd_custom_mem custom_mem)
+{
+	return ZSTD_createCDict_advanced(dict, dict_size, ZSTD_dlm_byRef,
+					 ZSTD_dct_auto, cparams, custom_mem);
+}
+EXPORT_SYMBOL(zstd_create_cdict_byreference);
+
+size_t zstd_free_cdict(zstd_cdict *cdict)
+{
+	return ZSTD_freeCDict(cdict);
+}
+EXPORT_SYMBOL(zstd_free_cdict);
+
 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
 	const void *src, size_t src_size, const zstd_parameters *parameters)
 {
@@ -101,6 +141,15 @@ size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
 }
 EXPORT_SYMBOL(zstd_compress_cctx);
 
+size_t zstd_compress_using_cdict(zstd_cctx *cctx, void *dst,
+	size_t dst_capacity, const void *src, size_t src_size,
+	const ZSTD_CDict *cdict)
+{
+	return ZSTD_compress_usingCDict(cctx, dst, dst_capacity,
+					src, src_size, cdict);
+}
+EXPORT_SYMBOL(zstd_compress_using_cdict);
+
 size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
 {
 	return ZSTD_estimateCStreamSize_usingCParams(*cparams);
diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
index f4ed952ed485..469fc3059be0 100644
--- a/lib/zstd/zstd_decompress_module.c
+++ b/lib/zstd/zstd_decompress_module.c
@@ -44,6 +44,33 @@ size_t zstd_dctx_workspace_bound(void)
 }
 EXPORT_SYMBOL(zstd_dctx_workspace_bound);
 
+zstd_dctx *zstd_create_dctx_advanced(zstd_custom_mem custom_mem)
+{
+	return ZSTD_createDCtx_advanced(custom_mem);
+}
+EXPORT_SYMBOL(zstd_create_dctx_advanced);
+
+size_t zstd_free_dctx(zstd_dctx *dctx)
+{
+	return ZSTD_freeDCtx(dctx);
+}
+EXPORT_SYMBOL(zstd_free_dctx);
+
+zstd_ddict *zstd_create_ddict_byreference(const void *dict, size_t dict_size,
+					  zstd_custom_mem custom_mem)
+{
+	return ZSTD_createDDict_advanced(dict, dict_size, ZSTD_dlm_byRef,
+					 ZSTD_dct_auto, custom_mem);
+
+}
+EXPORT_SYMBOL(zstd_create_ddict_byreference);
+
+size_t zstd_free_ddict(zstd_ddict *ddict)
+{
+	return ZSTD_freeDDict(ddict);
+}
+EXPORT_SYMBOL(zstd_free_ddict);
+
 zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
 {
 	if (workspace == NULL)
@@ -59,6 +86,15 @@ size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
 }
 EXPORT_SYMBOL(zstd_decompress_dctx);
 
+size_t zstd_decompress_using_ddict(zstd_dctx *dctx,
+	void *dst, size_t dst_capacity, const void* src, size_t src_size,
+	const zstd_ddict* ddict)
+{
+	return ZSTD_decompress_usingDDict(dctx, dst, dst_capacity, src,
+					  src_size, ddict);
+}
+EXPORT_SYMBOL(zstd_decompress_using_ddict);
+
 size_t zstd_dstream_workspace_bound(size_t max_window_size)
 {
 	return ZSTD_estimateDStreamSize(max_window_size);