Merge Linus master into drm-next

The merge is clean, but the arm build fails afterwards, due to API changes in the regulator tree. I've included the patch into the merge to fix the build. Signed-off-by: Dave Airlie <airlied@redhat.com>
author: Dave Airlie <airlied@redhat.com> 2015-04-20 11:32:26 +1000
committer: Dave Airlie <airlied@redhat.com> 2015-04-20 13:05:20 +1000
commit: 2c33ce009ca2389dbf0535d0672214d09738e35e (patch)
tree: 6186a6458c3c160385d794a23eaf07c786a9e61b /lib
parent: cec32a47010647e8b0603726ebb75b990a4057a4 (diff)
parent: 09d51602cf84a1264946711dd4ea0dddbac599a1 (diff)
download: linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.gz
linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.bz2
linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.zip
25 files changed, 1424 insertions, 1390 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 87da53bb1fef..f5440221d929 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE
 	default n
 	depends on BITREVERSE
 	help
-	  This option provides an config for the architecture which have instruction
-	  can do bitreverse operation, we use the hardware instruction if the architecture
-	  have this capability.
+	  This option enables the use of hardware bit-reversal instructions on
+	  architectures which support such operations.
 
 config RATIONAL
 	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3c009c..17670573dda8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
 	  data corruption or a sporadic crash at a later stage once the region
 	  is examined. The runtime overhead introduced is minimal.
 
+config DEBUG_TIMEKEEPING
+	bool "Enable extra timekeeping sanity checking"
+	help
+	  This option will enable additional timekeeping sanity checks
+	  which may be helpful when diagnosing issues where timekeeping
+	  problems are suspected.
+
+	  This may include checks in the timekeeping hotpaths, so this
+	  option may have a (very small) performance impact to some
+	  workloads.
+
+	  If unsure, say N.
+
 config TIMER_STATS
 	bool "Collect kernel timers statistics"
 	depends on DEBUG_KERNEL && PROC_FS
@@ -1180,16 +1193,7 @@ config DEBUG_CREDENTIALS
 menu "RCU Debugging"
 
 config PROVE_RCU
-	bool "RCU debugging: prove RCU correctness"
-	depends on PROVE_LOCKING
-	default n
-	help
-	 This feature enables lockdep extensions that check for correct
-	 use of RCU APIs.  This is currently under development.  Say Y
-	 if you want to debug RCU usage or help work on the PROVE_RCU
-	 feature.
-
-	 Say N if you are unsure.
+	def_bool PROVE_LOCKING
 
 config PROVE_RCU_REPEATEDLY
 	bool "RCU debugging: don't disable PROVE_RCU on first splat"
@@ -1257,6 +1261,30 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_TORTURE_TEST_SLOW_INIT
+	bool "Slow down RCU grace-period initialization to expose races"
+	depends on RCU_TORTURE_TEST
+	help
+	  This option makes grace-period initialization block for a
+	  few jiffies between initializing each pair of consecutive
+	  rcu_node structures.	This helps to expose races involving
+	  grace-period initialization, in other words, it makes your
+	  kernel less stable.  It can also greatly increase grace-period
+	  latency, especially on systems with large numbers of CPUs.
+	  This is useful when torture-testing RCU, but in almost no
+	  other circumstance.
+
+	  Say Y here if you want your system to crash and hang more often.
+	  Say N if you want a sane system.
+
+config RCU_TORTURE_TEST_SLOW_INIT_DELAY
+	int "How much to slow down RCU grace-period initialization"
+	range 0 5
+	default 3
+	help
+	  This option specifies the number of jiffies to wait between
+	  each rcu_node structure initialization.
+
 config RCU_CPU_STALL_TIMEOUT
 	int "RCU CPU stall timeout in seconds"
 	depends on RCU_STALL_COMMON
@@ -1732,6 +1760,18 @@ config TEST_UDELAY
 
 	  If unsure, say N.
 
+config MEMTEST
+	bool "Memtest"
+	depends on HAVE_MEMBLOCK
+	---help---
+	  This option adds a kernel parameter 'memtest', which allows memtest
+	  to be set.
+	        memtest=0, mean disabled; -- default
+	        memtest=1, mean do 1 test pattern;
+	        ...
+	        memtest=17, mean do 17 test patterns.
+	  If you are unsure how to answer this question, answer N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 58f74d2dd396..6c37933336a0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,7 +25,7 @@ obj-y	+= lockref.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
-	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
+	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
@@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
-obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
 obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d456f4c15a9f..64c0926f5dd8 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -42,36 +42,6 @@
  * for the best explanations of this ordering.
  */
 
-int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
-{
-	unsigned int k, lim = bits/BITS_PER_LONG;
-	for (k = 0; k < lim; ++k)
-		if (bitmap[k])
-			return 0;
-
-	if (bits % BITS_PER_LONG)
-		if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
-			return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL(__bitmap_empty);
-
-int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
-{
-	unsigned int k, lim = bits/BITS_PER_LONG;
-	for (k = 0; k < lim; ++k)
-		if (~bitmap[k])
-			return 0;
-
-	if (bits % BITS_PER_LONG)
-		if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
-			return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL(__bitmap_full);
-
 int __bitmap_equal(const unsigned long *bitmap1,
 		const unsigned long *bitmap2, unsigned int bits)
 {
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b6513a9f2892..5ab1553fd076 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -37,10 +37,11 @@ EXPORT_SYMBOL(__next_cpu_nr);
 int cpumask_next_and(int n, const struct cpumask *src1p,
 		     const struct cpumask *src2p)
 {
-	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
-		if (cpumask_test_cpu(n, src2p))
-			break;
-	return n;
+	struct cpumask tmp;
+
+	if (cpumask_and(&tmp, src1p, src2p))
+		return cpumask_next(n, &tmp);
+	return nr_cpu_ids;
 }
 EXPORT_SYMBOL(cpumask_next_and);
 
diff --git a/lib/div64.c b/lib/div64.c
index 4382ad77777e..19ea7ed4b948 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(div64_u64_rem);
  * by the book 'Hacker's Delight'.  The original source and full proof
  * can be found here and is available for use without restriction.
  *
- * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
  */
 #ifndef div64_u64
 u64 div64_u64(u64 dividend, u64 divisor)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9722bd2dbc9b..ae4b65e17e64 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
 	unsigned int range = 0;
 
 	while (range <= max_range) {
-		entry = __hash_bucket_find(*bucket, &index, containing_match);
+		entry = __hash_bucket_find(*bucket, ref, containing_match);
 
 		if (entry)
 			return entry;
diff --git a/lib/find_bit.c b/lib/find_bit.c
new file mode 100644
index 000000000000..18072ea9c20e
--- /dev/null
+++ b/lib/find_bit.c
@@ -0,0 +1,193 @@
+/* bit search implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+
+/*
+ * This is a common helper function for find_next_bit and
+ * find_next_zero_bit.  The difference is the "invert" argument, which
+ * is XORed with each fetched word before searching it for one bits.
+ */
+static unsigned long _find_next_bit(const unsigned long *addr,
+		unsigned long nbits, unsigned long start, unsigned long invert)
+{
+	unsigned long tmp;
+
+	if (!nbits || start >= nbits)
+		return nbits;
+
+	tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+	/* Handle 1st word. */
+	tmp &= BITMAP_FIRST_WORD_MASK(start);
+	start = round_down(start, BITS_PER_LONG);
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = addr[start / BITS_PER_LONG] ^ invert;
+	}
+
+	return min(start + __ffs(tmp), nbits);
+}
+#endif
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	return _find_next_bit(addr, size, offset, 0UL);
+}
+EXPORT_SYMBOL(find_next_bit);
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+	return _find_next_bit(addr, size, offset, ~0UL);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx])
+			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(find_first_bit);
+#endif
+
+#ifndef find_first_zero_bit
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		if (addr[idx] != ~0UL)
+			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(find_first_zero_bit);
+#endif
+
+#ifndef find_last_bit
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+	if (size) {
+		unsigned long val = BITMAP_LAST_WORD_MASK(size);
+		unsigned long idx = (size-1) / BITS_PER_LONG;
+
+		do {
+			val &= addr[idx];
+			if (val)
+				return idx * BITS_PER_LONG + __fls(val);
+
+			val = ~0ul;
+		} while (idx--);
+	}
+	return size;
+}
+EXPORT_SYMBOL(find_last_bit);
+#endif
+
+#ifdef __BIG_ENDIAN
+
+/* include/linux/byteorder does not support "unsigned long" type */
+static inline unsigned long ext2_swab(const unsigned long y)
+{
+#if BITS_PER_LONG == 64
+	return (unsigned long) __swab64((u64) y);
+#elif BITS_PER_LONG == 32
+	return (unsigned long) __swab32((u32) y);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
+static unsigned long _find_next_bit_le(const unsigned long *addr,
+		unsigned long nbits, unsigned long start, unsigned long invert)
+{
+	unsigned long tmp;
+
+	if (!nbits || start >= nbits)
+		return nbits;
+
+	tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+	/* Handle 1st word. */
+	tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
+	start = round_down(start, BITS_PER_LONG);
+
+	while (!tmp) {
+		start += BITS_PER_LONG;
+		if (start >= nbits)
+			return nbits;
+
+		tmp = addr[start / BITS_PER_LONG] ^ invert;
+	}
+
+	return min(start + __ffs(ext2_swab(tmp)), nbits);
+}
+#endif
+
+#ifndef find_next_zero_bit_le
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	return _find_next_bit_le(addr, size, offset, ~0UL);
+}
+EXPORT_SYMBOL(find_next_zero_bit_le);
+#endif
+
+#ifndef find_next_bit_le
+unsigned long find_next_bit_le(const void *addr, unsigned
+		long size, unsigned long offset)
+{
+	return _find_next_bit_le(addr, size, offset, 0UL);
+}
+EXPORT_SYMBOL(find_next_bit_le);
+#endif
+
+#endif /* __BIG_ENDIAN */
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 91ca09fbf6f9..3e3be40c6a6e 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -4,6 +4,9 @@
  * Written by Rusty Russell <rusty@rustcorp.com.au>
  * (Inspired by David Howell's find_next_bit implementation)
  *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -11,37 +14,26 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/export.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
 
 #ifndef find_last_bit
 
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
-	unsigned long words;
-	unsigned long tmp;
-
-	/* Start at final word. */
-	words = size / BITS_PER_LONG;
+	if (size) {
+		unsigned long val = BITMAP_LAST_WORD_MASK(size);
+		unsigned long idx = (size-1) / BITS_PER_LONG;
 
-	/* Partial final word? */
-	if (size & (BITS_PER_LONG-1)) {
-		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
-					 - (size & (BITS_PER_LONG-1)))));
-		if (tmp)
-			goto found;
-	}
+		do {
+			val &= addr[idx];
+			if (val)
+				return idx * BITS_PER_LONG + __fls(val);
 
-	while (words) {
-		tmp = addr[--words];
-		if (tmp) {
-found:
-			return words * BITS_PER_LONG + __fls(tmp);
-		}
+			val = ~0ul;
+		} while (idx--);
 	}
-
-	/* Not found */
 	return size;
 }
 EXPORT_SYMBOL(find_last_bit);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
deleted file mode 100644
index 0cbfc0b4398f..000000000000
--- a/lib/find_next_bit.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/* find_next_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/bitops.h>
-#include <linux/export.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
-#ifndef find_next_bit
-/*
- * Find the next set bit in a memory region.
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-			    unsigned long offset)
-{
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-}
-EXPORT_SYMBOL(find_next_bit);
-#endif
-
-#ifndef find_next_zero_bit
-/*
- * This implementation of find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h.
- */
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-				 unsigned long offset)
-{
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp |= ~0UL >> (BITS_PER_LONG - offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffz(tmp);
-}
-EXPORT_SYMBOL(find_next_zero_bit);
-#endif
-
-#ifndef find_first_bit
-/*
- * Find the first set bit in a memory region.
- */
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
-	const unsigned long *p = addr;
-	unsigned long result = 0;
-	unsigned long tmp;
-
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-
-	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found:
-	return result + __ffs(tmp);
-}
-EXPORT_SYMBOL(find_first_bit);
-#endif
-
-#ifndef find_first_zero_bit
-/*
- * Find the first cleared bit in a memory region.
- */
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
-	const unsigned long *p = addr;
-	unsigned long result = 0;
-	unsigned long tmp;
-
-	while (size & ~(BITS_PER_LONG-1)) {
-		if (~(tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-
-	tmp = (*p) | (~0UL << size);
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size;	/* Nope. */
-found:
-	return result + ffz(tmp);
-}
-EXPORT_SYMBOL(find_first_zero_bit);
-#endif
-
-#ifdef __BIG_ENDIAN
-
-/* include/linux/byteorder does not support "unsigned long" type */
-static inline unsigned long ext2_swabp(const unsigned long * x)
-{
-#if BITS_PER_LONG == 64
-	return (unsigned long) __swab64p((u64 *) x);
-#elif BITS_PER_LONG == 32
-	return (unsigned long) __swab32p((u32 *) x);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
-/* include/linux/byteorder doesn't support "unsigned long" type */
-static inline unsigned long ext2_swab(const unsigned long y)
-{
-#if BITS_PER_LONG == 64
-	return (unsigned long) __swab64((u64) y);
-#elif BITS_PER_LONG == 32
-	return (unsigned long) __swab32((u32) y);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
-#ifndef find_next_zero_bit_le
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	const unsigned long *p = addr;
-	unsigned long result = offset & ~(BITS_PER_LONG - 1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	p += BITOP_WORD(offset);
-	size -= result;
-	offset &= (BITS_PER_LONG - 1UL);
-	if (offset) {
-		tmp = ext2_swabp(p++);
-		tmp |= (~0UL >> (BITS_PER_LONG - offset));
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-
-	while (size & ~(BITS_PER_LONG - 1)) {
-		if (~(tmp = *(p++)))
-			goto found_middle_swap;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = ext2_swabp(p);
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)	/* Are any bits zero? */
-		return result + size; /* Nope. Skip ffz */
-found_middle:
-	return result + ffz(tmp);
-
-found_middle_swap:
-	return result + ffz(ext2_swab(tmp));
-}
-EXPORT_SYMBOL(find_next_zero_bit_le);
-#endif
-
-#ifndef find_next_bit_le
-unsigned long find_next_bit_le(const void *addr, unsigned
-		long size, unsigned long offset)
-{
-	const unsigned long *p = addr;
-	unsigned long result = offset & ~(BITS_PER_LONG - 1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	p += BITOP_WORD(offset);
-	size -= result;
-	offset &= (BITS_PER_LONG - 1UL);
-	if (offset) {
-		tmp = ext2_swabp(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-
-	while (size & ~(BITS_PER_LONG - 1)) {
-		tmp = *(p++);
-		if (tmp)
-			goto found_middle_swap;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = ext2_swabp(p);
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size; /* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-
-found_middle_swap:
-	return result + __ffs(ext2_swab(tmp));
-}
-EXPORT_SYMBOL(find_next_bit_le);
-#endif
-
-#endif /* __BIG_ENDIAN */
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
new file mode 100644
index 000000000000..a1a517cba7ec
--- /dev/null
+++ b/lib/iommu-common.c
@@ -0,0 +1,270 @@
+/*
+ * IOMMU mmap management and range allocation functions.
+ * Based almost entirely upon the powerpc iommu allocator.
+ */
+
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/iommu-helper.h>
+#include <linux/iommu-common.h>
+#include <linux/dma-mapping.h>
+#include <linux/hash.h>
+
+#ifndef	DMA_ERROR_CODE
+#define	DMA_ERROR_CODE (~(dma_addr_t)0x0)
+#endif
+
+unsigned long iommu_large_alloc = 15;
+
+static	DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+static inline bool need_flush(struct iommu_map_table *iommu)
+{
+	return (iommu->lazy_flush != NULL &&
+		(iommu->flags & IOMMU_NEED_FLUSH) != 0);
+}
+
+static inline void set_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags |= IOMMU_NEED_FLUSH;
+}
+
+static inline void clear_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags &= ~IOMMU_NEED_FLUSH;
+}
+
+static void setup_iommu_pool_hash(void)
+{
+	unsigned int i;
+	static bool do_once;
+
+	if (do_once)
+		return;
+	do_once = true;
+	for_each_possible_cpu(i)
+		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+}
+
+/*
+ * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
+ * is the number of table entries. If `large_pool' is set to true,
+ * the top 1/4 of the table will be set aside for pool allocations
+ * of more than iommu_large_alloc pages.
+ */
+extern void iommu_tbl_pool_init(struct iommu_map_table *iommu,
+				unsigned long num_entries,
+				u32 table_shift,
+				void (*lazy_flush)(struct iommu_map_table *),
+				bool large_pool, u32 npools,
+				bool skip_span_boundary_check)
+{
+	unsigned int start, i;
+	struct iommu_pool *p = &(iommu->large_pool);
+
+	setup_iommu_pool_hash();
+	if (npools == 0)
+		iommu->nr_pools = IOMMU_NR_POOLS;
+	else
+		iommu->nr_pools = npools;
+	BUG_ON(npools > IOMMU_NR_POOLS);
+
+	iommu->table_shift = table_shift;
+	iommu->lazy_flush = lazy_flush;
+	start = 0;
+	if (skip_span_boundary_check)
+		iommu->flags |= IOMMU_NO_SPAN_BOUND;
+	if (large_pool)
+		iommu->flags |= IOMMU_HAS_LARGE_POOL;
+
+	if (!large_pool)
+		iommu->poolsize = num_entries/iommu->nr_pools;
+	else
+		iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
+	for (i = 0; i < iommu->nr_pools; i++) {
+		spin_lock_init(&(iommu->pools[i].lock));
+		iommu->pools[i].start = start;
+		iommu->pools[i].hint = start;
+		start += iommu->poolsize; /* start for next pool */
+		iommu->pools[i].end = start - 1;
+	}
+	if (!large_pool)
+		return;
+	/* initialize large_pool */
+	spin_lock_init(&(p->lock));
+	p->start = start;
+	p->hint = p->start;
+	p->end = num_entries;
+}
+EXPORT_SYMBOL(iommu_tbl_pool_init);
+
+unsigned long iommu_tbl_range_alloc(struct device *dev,
+				struct iommu_map_table *iommu,
+				unsigned long npages,
+				unsigned long *handle,
+				unsigned long mask,
+				unsigned int align_order)
+{
+	unsigned int pool_hash = __this_cpu_read(iommu_pool_hash);
+	unsigned long n, end, start, limit, boundary_size;
+	struct iommu_pool *pool;
+	int pass = 0;
+	unsigned int pool_nr;
+	unsigned int npools = iommu->nr_pools;
+	unsigned long flags;
+	bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
+	bool largealloc = (large_pool && npages > iommu_large_alloc);
+	unsigned long shift;
+	unsigned long align_mask = 0;
+
+	if (align_order > 0)
+		align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+	/* Sanity check */
+	if (unlikely(npages == 0)) {
+		WARN_ON_ONCE(1);
+		return DMA_ERROR_CODE;
+	}
+
+	if (largealloc) {
+		pool = &(iommu->large_pool);
+		pool_nr = 0; /* to keep compiler happy */
+	} else {
+		/* pick out pool_nr */
+		pool_nr =  pool_hash & (npools - 1);
+		pool = &(iommu->pools[pool_nr]);
+	}
+	spin_lock_irqsave(&pool->lock, flags);
+
+ again:
+	if (pass == 0 && handle && *handle &&
+	    (*handle >= pool->start) && (*handle < pool->end))
+		start = *handle;
+	else
+		start = pool->hint;
+
+	limit = pool->end;
+
+	/* The case below can happen if we have a small segment appended
+	 * to a large, or when the previous alloc was at the very end of
+	 * the available space. If so, go back to the beginning. If a
+	 * flush is needed, it will get done based on the return value
+	 * from iommu_area_alloc() below.
+	 */
+	if (start >= limit)
+		start = pool->start;
+	shift = iommu->table_map_base >> iommu->table_shift;
+	if (limit + shift > mask) {
+		limit = mask - shift + 1;
+		/* If we're constrained on address range, first try
+		 * at the masked hint to avoid O(n) search complexity,
+		 * but on second pass, start at 0 in pool 0.
+		 */
+		if ((start & mask) >= limit || pass > 0) {
+			spin_unlock(&(pool->lock));
+			pool = &(iommu->pools[0]);
+			spin_lock(&(pool->lock));
+			start = pool->start;
+		} else {
+			start &= mask;
+		}
+	}
+
+	if (dev)
+		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+				      1 << iommu->table_shift);
+	else
+		boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
+
+	boundary_size = boundary_size >> iommu->table_shift;
+	/*
+	 * if the skip_span_boundary_check had been set during init, we set
+	 * things up so that iommu_is_span_boundary() merely checks if the
+	 * (index + npages) < num_tsb_entries
+	 */
+	if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
+		shift = 0;
+		boundary_size = iommu->poolsize * iommu->nr_pools;
+	}
+	n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
+			     boundary_size, align_mask);
+	if (n == -1) {
+		if (likely(pass == 0)) {
+			/* First failure, rescan from the beginning.  */
+			pool->hint = pool->start;
+			set_flush(iommu);
+			pass++;
+			goto again;
+		} else if (!largealloc && pass <= iommu->nr_pools) {
+			spin_unlock(&(pool->lock));
+			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
+			pool = &(iommu->pools[pool_nr]);
+			spin_lock(&(pool->lock));
+			pool->hint = pool->start;
+			set_flush(iommu);
+			pass++;
+			goto again;
+		} else {
+			/* give up */
+			n = DMA_ERROR_CODE;
+			goto bail;
+		}
+	}
+	if (n < pool->hint || need_flush(iommu)) {
+		clear_flush(iommu);
+		iommu->lazy_flush(iommu);
+	}
+
+	end = n + npages;
+	pool->hint = end;
+
+	/* Update handle for SG allocations */
+	if (handle)
+		*handle = end;
+bail:
+	spin_unlock_irqrestore(&(pool->lock), flags);
+
+	return n;
+}
+EXPORT_SYMBOL(iommu_tbl_range_alloc);
+
+static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
+				   unsigned long entry)
+{
+	struct iommu_pool *p;
+	unsigned long largepool_start = tbl->large_pool.start;
+	bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
+
+	/* The large pool is the last pool at the top of the table */
+	if (large_pool && entry >= largepool_start) {
+		p = &tbl->large_pool;
+	} else {
+		unsigned int pool_nr = entry / tbl->poolsize;
+
+		BUG_ON(pool_nr >= tbl->nr_pools);
+		p = &tbl->pools[pool_nr];
+	}
+	return p;
+}
+
+/* Caller supplies the index of the entry into the iommu map table
+ * itself when the mapping from dma_addr to the entry is not the
+ * default addr->entry mapping below.
+ */
+void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
+			  unsigned long npages, unsigned long entry)
+{
+	struct iommu_pool *pool;
+	unsigned long flags;
+	unsigned long shift = iommu->table_shift;
+
+	if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
+		entry = (dma_addr - iommu->table_map_base) >> shift;
+	pool = get_pool(iommu, entry);
+
+	spin_lock_irqsave(&(pool->lock), flags);
+	bitmap_clear(iommu->map, entry, npages);
+	spin_unlock_irqrestore(&(pool->lock), flags);
+}
+EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 0c9216c48762..86c8911b0e3a 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,6 +13,43 @@
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static int __read_mostly ioremap_pud_capable;
+static int __read_mostly ioremap_pmd_capable;
+static int __read_mostly ioremap_huge_disabled;
+
+static int __init set_nohugeiomap(char *str)
+{
+	ioremap_huge_disabled = 1;
+	return 0;
+}
+early_param("nohugeiomap", set_nohugeiomap);
+
+void __init ioremap_huge_init(void)
+{
+	if (!ioremap_huge_disabled) {
+		if (arch_ioremap_pud_supported())
+			ioremap_pud_capable = 1;
+		if (arch_ioremap_pmd_supported())
+			ioremap_pmd_capable = 1;
+	}
+}
+
+static inline int ioremap_pud_enabled(void)
+{
+	return ioremap_pud_capable;
+}
+
+static inline int ioremap_pmd_enabled(void)
+{
+	return ioremap_pmd_capable;
+}
+
+#else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int ioremap_pud_enabled(void) { return 0; }
+static inline int ioremap_pmd_enabled(void) { return 0; }
+#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
 static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
@@ -43,6 +80,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
+
+		if (ioremap_pmd_enabled() &&
+		    ((next - addr) == PMD_SIZE) &&
+		    IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
+			if (pmd_set_huge(pmd, phys_addr + addr, prot))
+				continue;
+		}
+
 		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
@@ -61,6 +106,14 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
+
+		if (ioremap_pud_enabled() &&
+		    ((next - addr) == PUD_SIZE) &&
+		    IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
+			if (pud_set_huge(pud, phys_addr + addr, prot))
+				continue;
+		}
+
 		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 9d96e283520c..75232ad0a5e7 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -317,6 +317,32 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
+/*
+ * Fault in one or more iovecs of the given iov_iter, to a maximum length of
+ * bytes.  For each iovec, fault in each page that constitutes the iovec.
+ *
+ * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
+ * because it is an invalid address).
+ */
+int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+{
+	size_t skip = i->iov_offset;
+	const struct iovec *iov;
+	int err;
+	struct iovec v;
+
+	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+		iterate_iovec(i, bytes, v, iov, skip, ({
+			err = fault_in_multipages_readable(v.iov_base,
+					v.iov_len);
+			if (unlikely(err))
+			return err;
+		0;}))
+	}
+	return 0;
+}
+EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
@@ -766,3 +792,60 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 				   flags);
 }
 EXPORT_SYMBOL(dup_iter);
+
+int import_iovec(int type, const struct iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i)
+{
+	ssize_t n;
+	struct iovec *p;
+	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+				  *iov, &p);
+	if (n < 0) {
+		if (p != *iov)
+			kfree(p);
+		*iov = NULL;
+		return n;
+	}
+	iov_iter_init(i, type, p, nr_segs, n);
+	*iov = p == *iov ? NULL : p;
+	return 0;
+}
+EXPORT_SYMBOL(import_iovec);
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
+		 unsigned nr_segs, unsigned fast_segs,
+		 struct iovec **iov, struct iov_iter *i)
+{
+	ssize_t n;
+	struct iovec *p;
+	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
+				  *iov, &p);
+	if (n < 0) {
+		if (p != *iov)
+			kfree(p);
+		*iov = NULL;
+		return n;
+	}
+	iov_iter_init(i, type, p, nr_segs, n);
+	*iov = p == *iov ? NULL : p;
+	return 0;
+}
+#endif
+
+int import_single_range(int rw, void __user *buf, size_t len,
+		 struct iovec *iov, struct iov_iter *i)
+{
+	if (len > MAX_RW_COUNT)
+		len = MAX_RW_COUNT;
+	if (unlikely(!access_ok(!rw, buf, len)))
+		return -EFAULT;
+
+	iov->iov_base = buf;
+	iov->iov_len = len;
+	iov_iter_init(i, rw, iov, 1, len);
+	return 0;
+}
diff --git a/lib/kobject.c b/lib/kobject.c
index 03d4ab349fa7..3b841b97fccd 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -576,8 +576,13 @@ void kobject_del(struct kobject *kobj)
  */
 struct kobject *kobject_get(struct kobject *kobj)
 {
-	if (kobj)
+	if (kobj) {
+		if (!kobj->state_initialized)
+			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
+			       "initialized, yet kobject_get() is being "
+			       "called.\n", kobject_name(kobj), kobj);
 		kref_get(&kobj->kref);
+	}
 	return kobj;
 }
 
diff --git a/lib/lockref.c b/lib/lockref.c
index ecb9a665ec19..494994bf17c8 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -18,7 +18,7 @@
 #define CMPXCHG_LOOP(CODE, SUCCESS) do {					\
 	struct lockref old;							\
 	BUILD_BUG_ON(sizeof(old) != 8);						\
-	old.lock_count = ACCESS_ONCE(lockref->lock_count);			\
+	old.lock_count = READ_ONCE(lockref->lock_count);			\
 	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
 		struct lockref new = old, prev = old;				\
 		CODE								\
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 852c81e3ba9a..028f5d996eef 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -247,10 +247,11 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
 	 * progress) and "changed", when this in fact lead to an successful
 	 * update of the cache.
 	 */
-	return seq_printf(seq, "\t%s: used:%u/%u "
-		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
-		lc->name, lc->used, lc->nr_elements,
-		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
+	seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
+		   lc->name, lc->used, lc->nr_elements,
+		   lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
+
+	return 0;
 }
 
 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index f0f5c5c3de12..26cc6029b280 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -47,6 +47,11 @@
 
 #include "lz4defs.h"
 
+static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
 static int lz4_uncompress(const char *source, char *dest, int osize)
 {
 	const BYTE *ip = (const BYTE *) source;
@@ -56,10 +61,6 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
 	BYTE *cpy;
 	unsigned token;
 	size_t length;
-	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-	size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
 
 	while (1) {
 
@@ -116,7 +117,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
 		/* copy repeated sequence */
 		if (unlikely((op - ref) < STEPSIZE)) {
 #if LZ4_ARCH64
-			size_t dec64 = dec64table[op - ref];
+			int dec64 = dec64table[op - ref];
 #else
 			const int dec64 = 0;
 #endif
@@ -177,11 +178,6 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
 	BYTE * const oend = op + maxoutputsize;
 	BYTE *cpy;
 
-	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-	size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
 	/* Main Loop */
 	while (ip < iend) {
 
@@ -249,7 +245,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
 		/* copy repeated sequence */
 		if (unlikely((op - ref) < STEPSIZE)) {
 #if LZ4_ARCH64
-			size_t dec64 = dec64table[op - ref];
+			int dec64 = dec64table[op - ref];
 #else
 			const int dec64 = 0;
 #endif
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b5344ef4c684..4898442b837f 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,13 +1,13 @@
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
- * Based on the following paper:
- * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
- *
  * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -27,121 +27,18 @@
 #include <linux/err.h>
 
 #define HASH_DEFAULT_SIZE	64UL
-#define HASH_MIN_SIZE		4UL
+#define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU   128UL
 
-/* Base bits plus 1 bit for nulls marker */
-#define HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
-
-enum {
-	RHT_LOCK_NORMAL,
-	RHT_LOCK_NESTED,
-};
-
-/* The bucket lock is selected based on the hash and protects mutations
- * on a group of hash buckets.
- *
- * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
- * a single lock always covers both buckets which may both contains
- * entries which link to the same bucket of the old table during resizing.
- * This allows to simplify the locking as locking the bucket in both
- * tables during resize always guarantee protection.
- *
- * IMPORTANT: When holding the bucket lock of both the old and new table
- * during expansions and shrinking, the old bucket lock must always be
- * acquired first.
- */
-static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
-{
-	return &tbl->locks[hash & tbl->locks_mask];
-}
-
-static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
-{
-	return (void *) he - ht->p.head_offset;
-}
-
-static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
-{
-	return hash & (tbl->size - 1);
-}
-
-static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
-{
-	u32 hash;
-
-	if (unlikely(!ht->p.key_len))
-		hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
-	else
-		hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
-				    ht->p.hash_rnd);
-
-	return hash >> HASH_RESERVED_SPACE;
-}
-
-static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
-{
-	return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
-}
-
-static u32 head_hashfn(const struct rhashtable *ht,
+static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
 {
-	return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
+	return rht_head_hashfn(ht, tbl, he, ht->p);
 }
 
 #ifdef CONFIG_PROVE_LOCKING
-static void debug_dump_buckets(const struct rhashtable *ht,
-			       const struct bucket_table *tbl)
-{
-	struct rhash_head *he;
-	unsigned int i, hash;
-
-	for (i = 0; i < tbl->size; i++) {
-		pr_warn(" [Bucket %d] ", i);
-		rht_for_each_rcu(he, tbl, i) {
-			hash = head_hashfn(ht, tbl, he);
-			pr_cont("[hash = %#x, lock = %p] ",
-				hash, bucket_lock(tbl, hash));
-		}
-		pr_cont("\n");
-	}
-
-}
-
-static void debug_dump_table(struct rhashtable *ht,
-			     const struct bucket_table *tbl,
-			     unsigned int hash)
-{
-	struct bucket_table *old_tbl, *future_tbl;
-
-	pr_emerg("BUG: lock for hash %#x in table %p not held\n",
-		 hash, tbl);
-
-	rcu_read_lock();
-	future_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	if (future_tbl != old_tbl) {
-		pr_warn("Future table %p (size: %zd)\n",
-			future_tbl, future_tbl->size);
-		debug_dump_buckets(ht, future_tbl);
-	}
-
-	pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size);
-	debug_dump_buckets(ht, old_tbl);
-
-	rcu_read_unlock();
-}
-
 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
-#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)				\
-	do {								\
-		if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) {	\
-			debug_dump_table(HT, TBL, HASH);		\
-			BUG();						\
-		}							\
-	} while (0)
 
 int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 {
@@ -151,30 +48,18 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 {
-	spinlock_t *lock = bucket_lock(tbl, hash);
+	spinlock_t *lock = rht_bucket_lock(tbl, hash);
 
 	return (debug_locks) ? lockdep_is_held(lock) : 1;
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #else
 #define ASSERT_RHT_MUTEX(HT)
-#define ASSERT_BUCKET_LOCK(HT, TBL, HASH)
 #endif
 
 
-static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
-{
-	struct rhash_head __rcu **pprev;
-
-	for (pprev = &tbl->buckets[n];
-	     !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
-	     pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
-		;
-
-	return pprev;
-}
-
-static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
+static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
+			      gfp_t gfp)
 {
 	unsigned int i, size;
 #if defined(CONFIG_PROVE_LOCKING)
@@ -191,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
 
 	if (sizeof(spinlock_t) != 0) {
 #ifdef CONFIG_NUMA
-		if (size * sizeof(spinlock_t) > PAGE_SIZE)
+		if (size * sizeof(spinlock_t) > PAGE_SIZE &&
+		    gfp == GFP_KERNEL)
 			tbl->locks = vmalloc(size * sizeof(spinlock_t));
 		else
 #endif
 		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-					   GFP_KERNEL);
+					   gfp);
 		if (!tbl->locks)
 			return -ENOMEM;
 		for (i = 0; i < size; i++)
@@ -215,153 +101,181 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	kvfree(tbl);
 }
 
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+	bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
-					       size_t nbuckets)
+					       size_t nbuckets,
+					       gfp_t gfp)
 {
 	struct bucket_table *tbl = NULL;
 	size_t size;
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
-		tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-	if (tbl == NULL)
+	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
+	    gfp != GFP_KERNEL)
+		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
+	if (tbl == NULL && gfp == GFP_KERNEL)
 		tbl = vzalloc(size);
 	if (tbl == NULL)
 		return NULL;
 
 	tbl->size = nbuckets;
 
-	if (alloc_bucket_locks(ht, tbl) < 0) {
+	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
 		bucket_table_free(tbl);
 		return NULL;
 	}
 
+	INIT_LIST_HEAD(&tbl->walkers);
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
 	for (i = 0; i < nbuckets; i++)
 		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
 
 	return tbl;
 }
 
-/**
- * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
- * @ht:		hash table
- * @new_size:	new table size
- */
-static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
+						  struct bucket_table *tbl)
 {
-	/* Expand table when exceeding 75% load */
-	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
-	       (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
-}
+	struct bucket_table *new_tbl;
 
-/**
- * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
- * @ht:		hash table
- * @new_size:	new table size
- */
-static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
-{
-	/* Shrink table beneath 30% load */
-	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
-	       (atomic_read(&ht->shift) > ht->p.min_shift);
-}
+	do {
+		new_tbl = tbl;
+		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	} while (tbl);
 
-static void lock_buckets(struct bucket_table *new_tbl,
-			 struct bucket_table *old_tbl, unsigned int hash)
-	__acquires(old_bucket_lock)
-{
-	spin_lock_bh(bucket_lock(old_tbl, hash));
-	if (new_tbl != old_tbl)
-		spin_lock_bh_nested(bucket_lock(new_tbl, hash),
-				    RHT_LOCK_NESTED);
+	return new_tbl;
 }
 
-static void unlock_buckets(struct bucket_table *new_tbl,
-			   struct bucket_table *old_tbl, unsigned int hash)
-	__releases(old_bucket_lock)
+static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
-	if (new_tbl != old_tbl)
-		spin_unlock_bh(bucket_lock(new_tbl, hash));
-	spin_unlock_bh(bucket_lock(old_tbl, hash));
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl = rhashtable_last_table(ht,
+		rht_dereference_rcu(old_tbl->future_tbl, ht));
+	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
+	int err = -ENOENT;
+	struct rhash_head *head, *next, *entry;
+	spinlock_t *new_bucket_lock;
+	unsigned int new_hash;
+
+	rht_for_each(entry, old_tbl, old_hash) {
+		err = 0;
+		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
+
+		if (rht_is_a_nulls(next))
+			break;
+
+		pprev = &entry->next;
+	}
+
+	if (err)
+		goto out;
+
+	new_hash = head_hashfn(ht, new_tbl, entry);
+
+	new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
+
+	spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
+	head = rht_dereference_bucket(new_tbl->buckets[new_hash],
+				      new_tbl, new_hash);
+
+	if (rht_is_a_nulls(head))
+		INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash);
+	else
+		RCU_INIT_POINTER(entry->next, head);
+
+	rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
+	spin_unlock(new_bucket_lock);
+
+	rcu_assign_pointer(*pprev, next);
+
+out:
+	return err;
 }
 
-/**
- * Unlink entries on bucket which hash to different bucket.
- *
- * Returns true if no more work needs to be performed on the bucket.
- */
-static bool hashtable_chain_unzip(struct rhashtable *ht,
-				  const struct bucket_table *new_tbl,
-				  struct bucket_table *old_tbl,
-				  size_t old_hash)
+static void rhashtable_rehash_chain(struct rhashtable *ht,
+				    unsigned int old_hash)
 {
-	struct rhash_head *he, *p, *next;
-	unsigned int new_hash, new_hash2;
-
-	ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash);
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	spinlock_t *old_bucket_lock;
 
-	/* Old bucket empty, no work needed. */
-	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
-				   old_hash);
-	if (rht_is_a_nulls(p))
-		return false;
+	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
 
-	new_hash = head_hashfn(ht, new_tbl, p);
-	ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
+	spin_lock_bh(old_bucket_lock);
+	while (!rhashtable_rehash_one(ht, old_hash))
+		;
+	old_tbl->rehash++;
+	spin_unlock_bh(old_bucket_lock);
+}
 
-	/* Advance the old bucket pointer one or more times until it
-	 * reaches a node that doesn't hash to the same bucket as the
-	 * previous node p. Call the previous node p;
-	 */
-	rht_for_each_continue(he, p->next, old_tbl, old_hash) {
-		new_hash2 = head_hashfn(ht, new_tbl, he);
-		ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2);
+static int rhashtable_rehash_attach(struct rhashtable *ht,
+				    struct bucket_table *old_tbl,
+				    struct bucket_table *new_tbl)
+{
+	/* Protect future_tbl using the first bucket lock. */
+	spin_lock_bh(old_tbl->locks);
 
-		if (new_hash != new_hash2)
-			break;
-		p = he;
+	/* Did somebody beat us to it? */
+	if (rcu_access_pointer(old_tbl->future_tbl)) {
+		spin_unlock_bh(old_tbl->locks);
+		return -EEXIST;
 	}
-	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
 
-	/* Find the subsequent node which does hash to the same
-	 * bucket as node P, or NULL if no such node exists.
+	/* Make insertions go into the new, empty table right away. Deletions
+	 * and lookups will be attempted in both tables until we synchronize.
 	 */
-	INIT_RHT_NULLS_HEAD(next, ht, old_hash);
-	if (!rht_is_a_nulls(he)) {
-		rht_for_each_continue(he, he->next, old_tbl, old_hash) {
-			if (head_hashfn(ht, new_tbl, he) == new_hash) {
-				next = he;
-				break;
-			}
-		}
-	}
+	rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
 
-	/* Set p's next pointer to that subsequent node pointer,
-	 * bypassing the nodes which do not hash to p's bucket
-	 */
-	rcu_assign_pointer(p->next, next);
+	/* Ensure the new table is visible to readers. */
+	smp_wmb();
 
-	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
-				   old_hash);
+	spin_unlock_bh(old_tbl->locks);
 
-	return !rht_is_a_nulls(p);
+	return 0;
 }
 
-static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
-			    unsigned int new_hash, struct rhash_head *entry)
+static int rhashtable_rehash_table(struct rhashtable *ht)
 {
-	ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash);
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl;
+	struct rhashtable_walker *walker;
+	unsigned int old_hash;
+
+	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
+	if (!new_tbl)
+		return 0;
+
+	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
+		rhashtable_rehash_chain(ht, old_hash);
+
+	/* Publish the new table pointer. */
+	rcu_assign_pointer(ht->tbl, new_tbl);
+
+	spin_lock(&ht->lock);
+	list_for_each_entry(walker, &old_tbl->walkers, list)
+		walker->tbl = NULL;
+	spin_unlock(&ht->lock);
 
-	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
+	/* Wait for readers. All new readers will see the new
+	 * table, and thus no references to the old table will
+	 * remain.
+	 */
+	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+
+	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
 
 /**
  * rhashtable_expand - Expand hash table while allowing concurrent lookups
  * @ht:		the hash table to expand
  *
- * A secondary bucket array is allocated and the hash entries are migrated
- * while keeping them on both lists until the end of the RCU grace period.
+ * A secondary bucket array is allocated and the hash entries are migrated.
  *
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
@@ -372,89 +286,32 @@ static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl,
  * It is valid to have concurrent insertions and deletions protected by per
  * bucket locks or concurrent RCU protected lookups and traversals.
  */
-int rhashtable_expand(struct rhashtable *ht)
+static int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	struct rhash_head *he;
-	unsigned int new_hash, old_hash;
-	bool complete = false;
+	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
+	old_tbl = rhashtable_last_table(ht, old_tbl);
+
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	atomic_inc(&ht->shift);
-
-	/* Make insertions go into the new, empty table right away. Deletions
-	 * and lookups will be attempted in both tables until we synchronize.
-	 * The synchronize_rcu() guarantees for the new table to be picked up
-	 * so no new additions go into the old table while we relink.
-	 */
-	rcu_assign_pointer(ht->future_tbl, new_tbl);
-	synchronize_rcu();
-
-	/* For each new bucket, search the corresponding old bucket for the
-	 * first entry that hashes to the new bucket, and link the end of
-	 * newly formed bucket chain (containing entries added to future
-	 * table) to that entry. Since all the entries which will end up in
-	 * the new bucket appear in the same old bucket, this constructs an
-	 * entirely valid new hash table, but with multiple buckets
-	 * "zipped" together into a single imprecise chain.
-	 */
-	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-		old_hash = rht_bucket_index(old_tbl, new_hash);
-		lock_buckets(new_tbl, old_tbl, new_hash);
-		rht_for_each(he, old_tbl, old_hash) {
-			if (head_hashfn(ht, new_tbl, he) == new_hash) {
-				link_old_to_new(ht, new_tbl, new_hash, he);
-				break;
-			}
-		}
-		unlock_buckets(new_tbl, old_tbl, new_hash);
-		cond_resched();
-	}
-
-	/* Unzip interleaved hash chains */
-	while (!complete && !ht->being_destroyed) {
-		/* Wait for readers. All new readers will see the new
-		 * table, and thus no references to the old table will
-		 * remain.
-		 */
-		synchronize_rcu();
-
-		/* For each bucket in the old table (each of which
-		 * contains items from multiple buckets of the new
-		 * table): ...
-		 */
-		complete = true;
-		for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
-			lock_buckets(new_tbl, old_tbl, old_hash);
-
-			if (hashtable_chain_unzip(ht, new_tbl, old_tbl,
-						  old_hash))
-				complete = false;
-
-			unlock_buckets(new_tbl, old_tbl, old_hash);
-			cond_resched();
-		}
-	}
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
 
-	rcu_assign_pointer(ht->tbl, new_tbl);
-	synchronize_rcu();
-
-	bucket_table_free(old_tbl);
-	return 0;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_expand);
 
 /**
  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
  * @ht:		the hash table to shrink
  *
- * This function may only be called in a context where it is safe to call
- * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ * This function shrinks the hash table to fit, i.e., the smallest
+ * size would not cause it to expand right away automatically.
  *
  * The caller must ensure that no concurrent resizing occurs by holding
  * ht->mutex.
@@ -465,395 +322,146 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * It is valid to have concurrent insertions and deletions protected by per
  * bucket locks or concurrent RCU protected lookups and traversals.
  */
-int rhashtable_shrink(struct rhashtable *ht)
+static int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
-	unsigned int new_hash;
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	unsigned int size;
+	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, tbl->size / 2);
-	if (new_tbl == NULL)
-		return -ENOMEM;
-
-	rcu_assign_pointer(ht->future_tbl, new_tbl);
-	synchronize_rcu();
-
-	/* Link the first entry in the old bucket to the end of the
-	 * bucket in the new table. As entries are concurrently being
-	 * added to the new table, lock down the new bucket. As we
-	 * always divide the size in half when shrinking, each bucket
-	 * in the new table maps to exactly two buckets in the old
-	 * table.
-	 */
-	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
-		lock_buckets(new_tbl, tbl, new_hash);
-
-		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
-				   tbl->buckets[new_hash]);
-		ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size);
-		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
-				   tbl->buckets[new_hash + new_tbl->size]);
+	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	if (size < ht->p.min_size)
+		size = ht->p.min_size;
 
-		unlock_buckets(new_tbl, tbl, new_hash);
-		cond_resched();
-	}
+	if (old_tbl->size <= size)
+		return 0;
 
-	/* Publish the new, valid hash table */
-	rcu_assign_pointer(ht->tbl, new_tbl);
-	atomic_dec(&ht->shift);
+	if (rht_dereference(old_tbl->future_tbl, ht))
+		return -EEXIST;
 
-	/* Wait for readers. No new readers will have references to the
-	 * old hash table.
-	 */
-	synchronize_rcu();
+	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+	if (new_tbl == NULL)
+		return -ENOMEM;
 
-	bucket_table_free(tbl);
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
 
-	return 0;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_shrink);
 
 static void rht_deferred_worker(struct work_struct *work)
 {
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
-	struct rhashtable_walker *walker;
+	int err = 0;
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
-	if (ht->being_destroyed)
-		goto unlock;
 
 	tbl = rht_dereference(ht->tbl, ht);
+	tbl = rhashtable_last_table(ht, tbl);
 
-	list_for_each_entry(walker, &ht->walkers, list)
-		walker->resize = true;
-
-	if (rht_grow_above_75(ht, tbl->size))
+	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
-	else if (rht_shrink_below_30(ht, tbl->size))
+	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
 		rhashtable_shrink(ht);
-unlock:
-	mutex_unlock(&ht->mutex);
-}
 
-static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-				struct bucket_table *tbl,
-				const struct bucket_table *old_tbl, u32 hash)
-{
-	bool no_resize_running = tbl == old_tbl;
-	struct rhash_head *head;
+	err = rhashtable_rehash_table(ht);
 
-	hash = rht_bucket_index(tbl, hash);
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
-
-	ASSERT_BUCKET_LOCK(ht, tbl, hash);
-
-	if (rht_is_a_nulls(head))
-		INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
-	else
-		RCU_INIT_POINTER(obj->next, head);
-
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	mutex_unlock(&ht->mutex);
 
-	atomic_inc(&ht->nelems);
-	if (no_resize_running && rht_grow_above_75(ht, tbl->size))
+	if (err)
 		schedule_work(&ht->run_work);
 }
 
-/**
- * rhashtable_insert - insert object into hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Will take a per bucket spinlock to protect against mutual mutations
- * on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
+static bool rhashtable_check_elasticity(struct rhashtable *ht,
+					struct bucket_table *tbl,
+					unsigned int hash)
 {
-	struct bucket_table *tbl, *old_tbl;
-	unsigned hash;
-
-	rcu_read_lock();
-
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
+	unsigned int elasticity = ht->elasticity;
+	struct rhash_head *head;
 
-	lock_buckets(tbl, old_tbl, hash);
-	__rhashtable_insert(ht, obj, tbl, old_tbl, hash);
-	unlock_buckets(tbl, old_tbl, hash);
+	rht_for_each(head, tbl, hash)
+		if (!--elasticity)
+			return true;
 
-	rcu_read_unlock();
+	return false;
 }
-EXPORT_SYMBOL_GPL(rhashtable_insert);
 
-/**
- * rhashtable_remove - remove object from hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Since the hash chain is single linked, the removal operation needs to
- * walk the bucket chain upon removal. The removal operation is thus
- * considerable slow if the hash table is not correctly sized.
- *
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
- *
- * The caller must ensure that no concurrent table mutations occur. It is
- * however valid to have concurrent lookups if they are RCU protected.
- */
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
+int rhashtable_insert_rehash(struct rhashtable *ht)
 {
-	struct bucket_table *tbl, *new_tbl, *old_tbl;
-	struct rhash_head __rcu **pprev;
-	struct rhash_head *he, *he2;
-	unsigned int hash, new_hash;
-	bool ret = false;
+	struct bucket_table *old_tbl;
+	struct bucket_table *new_tbl;
+	struct bucket_table *tbl;
+	unsigned int size;
+	int err;
 
-	rcu_read_lock();
 	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
-
-	lock_buckets(new_tbl, old_tbl, new_hash);
-restart:
-	hash = rht_bucket_index(tbl, new_hash);
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
-		if (he != obj) {
-			pprev = &he->next;
-			continue;
-		}
-
-		ASSERT_BUCKET_LOCK(ht, tbl, hash);
-
-		if (old_tbl->size > new_tbl->size && tbl == old_tbl &&
-		    !rht_is_a_nulls(obj->next) &&
-		    head_hashfn(ht, tbl, obj->next) != hash) {
-			rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
-		} else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) {
-			rht_for_each_continue(he2, obj->next, tbl, hash) {
-				if (head_hashfn(ht, tbl, he2) == hash) {
-					rcu_assign_pointer(*pprev, he2);
-					goto found;
-				}
-			}
-
-			rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash));
-		} else {
-			rcu_assign_pointer(*pprev, obj->next);
-		}
-
-found:
-		ret = true;
-		break;
-	}
-
-	/* The entry may be linked in either 'tbl', 'future_tbl', or both.
-	 * 'future_tbl' only exists for a short period of time during
-	 * resizing. Thus traversing both is fine and the added cost is
-	 * very rare.
-	 */
-	if (tbl != old_tbl) {
-		tbl = old_tbl;
-		goto restart;
-	}
-
-	unlock_buckets(new_tbl, old_tbl, new_hash);
-
-	if (ret) {
-		bool no_resize_running = new_tbl == old_tbl;
-
-		atomic_dec(&ht->nelems);
-		if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size))
-			schedule_work(&ht->run_work);
-	}
-
-	rcu_read_unlock();
+	tbl = rhashtable_last_table(ht, old_tbl);
 
-	return ret;
-}
-EXPORT_SYMBOL_GPL(rhashtable_remove);
-
-struct rhashtable_compare_arg {
-	struct rhashtable *ht;
-	const void *key;
-};
-
-static bool rhashtable_compare(void *ptr, void *arg)
-{
-	struct rhashtable_compare_arg *x = arg;
-	struct rhashtable *ht = x->ht;
-
-	return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
-}
-
-/**
- * rhashtable_lookup - lookup key in hash table
- * @ht:		hash table
- * @key:	pointer to key
- *
- * Computes the hash value for the key and traverses the bucket chain looking
- * for a entry with an identical key. The first matching entry is returned.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- */
-void *rhashtable_lookup(struct rhashtable *ht, const void *key)
-{
-	struct rhashtable_compare_arg arg = {
-		.ht = ht,
-		.key = key,
-	};
-
-	BUG_ON(!ht->p.key_len);
-
-	return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup);
-
-/**
- * rhashtable_lookup_compare - search hash table with compare function
- * @ht:		hash table
- * @key:	the pointer to the key
- * @compare:	compare function, must return true on match
- * @arg:	argument passed on to compare function
- *
- * Traverses the bucket chain behind the provided hash value and calls the
- * specified compare function for each entry.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Returns the first entry on which the compare function returned true.
- */
-void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
-				bool (*compare)(void *, void *), void *arg)
-{
-	const struct bucket_table *tbl, *old_tbl;
-	struct rhash_head *he;
-	u32 hash;
+	size = tbl->size;
 
-	rcu_read_lock();
+	if (rht_grow_above_75(ht, tbl))
+		size *= 2;
+	/* More than two rehashes (not resizes) detected. */
+	else if (WARN_ON(old_tbl != tbl && old_tbl->size == size))
+		return -EBUSY;
 
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	hash = key_hashfn(ht, key, ht->p.key_len);
-restart:
-	rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
-		if (!compare(rht_obj(ht, he), arg))
-			continue;
-		rcu_read_unlock();
-		return rht_obj(ht, he);
-	}
+	new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+	if (new_tbl == NULL)
+		return -ENOMEM;
 
-	if (unlikely(tbl != old_tbl)) {
-		tbl = old_tbl;
-		goto restart;
-	}
-	rcu_read_unlock();
+	err = rhashtable_rehash_attach(ht, tbl, new_tbl);
+	if (err) {
+		bucket_table_free(new_tbl);
+		if (err == -EEXIST)
+			err = 0;
+	} else
+		schedule_work(&ht->run_work);
 
-	return NULL;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
+EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
 
-/**
- * rhashtable_lookup_insert - lookup and insert object into hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
+int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+			   struct rhash_head *obj,
+			   struct bucket_table *tbl)
 {
-	struct rhashtable_compare_arg arg = {
-		.ht = ht,
-		.key = rht_obj(ht, obj) + ht->p.key_offset,
-	};
+	struct rhash_head *head;
+	unsigned int hash;
+	int err;
 
-	BUG_ON(!ht->p.key_len);
+	tbl = rhashtable_last_table(ht, tbl);
+	hash = head_hashfn(ht, tbl, obj);
+	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
 
-	return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
-						&arg);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+	err = -EEXIST;
+	if (key && rhashtable_lookup_fast(ht, key, ht->p))
+		goto exit;
 
-/**
- * rhashtable_lookup_compare_insert - search and insert object to hash table
- *                                    with compare function
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- * @compare:	compare function, must return true on match
- * @arg:	argument passed on to compare function
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
-				      struct rhash_head *obj,
-				      bool (*compare)(void *, void *),
-				      void *arg)
-{
-	struct bucket_table *new_tbl, *old_tbl;
-	u32 new_hash;
-	bool success = true;
+	err = -EAGAIN;
+	if (rhashtable_check_elasticity(ht, tbl, hash) ||
+	    rht_grow_above_100(ht, tbl))
+		goto exit;
 
-	BUG_ON(!ht->p.key_len);
+	err = 0;
 
-	rcu_read_lock();
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
-	lock_buckets(new_tbl, old_tbl, new_hash);
+	RCU_INIT_POINTER(obj->next, head);
 
-	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
-				      compare, arg)) {
-		success = false;
-		goto exit;
-	}
+	rcu_assign_pointer(tbl->buckets[hash], obj);
 
-	__rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash);
+	atomic_inc(&ht->nelems);
 
 exit:
-	unlock_buckets(new_tbl, old_tbl, new_hash);
-	rcu_read_unlock();
+	spin_unlock(rht_bucket_lock(tbl, hash));
 
-	return success;
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
+EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 
 /**
  * rhashtable_walk_init - Initialise an iterator
@@ -887,11 +495,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
 	if (!iter->walker)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&iter->walker->list);
-	iter->walker->resize = false;
-
 	mutex_lock(&ht->mutex);
-	list_add(&iter->walker->list, &ht->walkers);
+	iter->walker->tbl = rht_dereference(ht->tbl, ht);
+	list_add(&iter->walker->list, &iter->walker->tbl->walkers);
 	mutex_unlock(&ht->mutex);
 
 	return 0;
@@ -907,7 +513,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
 	mutex_lock(&iter->ht->mutex);
-	list_del(&iter->walker->list);
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
 	mutex_unlock(&iter->ht->mutex);
 	kfree(iter->walker);
 }
@@ -928,13 +535,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * by calling rhashtable_walk_next.
  */
 int rhashtable_walk_start(struct rhashtable_iter *iter)
+	__acquires(RCU)
 {
+	struct rhashtable *ht = iter->ht;
+
+	mutex_lock(&ht->mutex);
+
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
+
 	rcu_read_lock();
 
-	if (iter->walker->resize) {
-		iter->slot = 0;
-		iter->skip = 0;
-		iter->walker->resize = false;
+	mutex_unlock(&ht->mutex);
+
+	if (!iter->walker->tbl) {
+		iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
@@ -956,13 +571,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
  */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl = iter->walker->tbl;
 	struct rhashtable *ht = iter->ht;
 	struct rhash_head *p = iter->p;
 	void *obj = NULL;
 
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-
 	if (p) {
 		p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
 		goto next;
@@ -988,17 +601,20 @@ next:
 		iter->skip = 0;
 	}
 
-	iter->p = NULL;
+	/* Ensure we see any new tables. */
+	smp_rmb();
 
-out:
-	if (iter->walker->resize) {
-		iter->p = NULL;
+	iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (iter->walker->tbl) {
 		iter->slot = 0;
 		iter->skip = 0;
-		iter->walker->resize = false;
 		return ERR_PTR(-EAGAIN);
 	}
 
+	iter->p = NULL;
+
+out:
+
 	return obj;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -1010,16 +626,39 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  * Finish a hash table walk.
  */
 void rhashtable_walk_stop(struct rhashtable_iter *iter)
+	__releases(RCU)
 {
-	rcu_read_unlock();
+	struct rhashtable *ht;
+	struct bucket_table *tbl = iter->walker->tbl;
+
+	if (!tbl)
+		goto out;
+
+	ht = iter->ht;
+
+	spin_lock(&ht->lock);
+	if (tbl->rehash < tbl->size)
+		list_add(&iter->walker->list, &tbl->walkers);
+	else
+		iter->walker->tbl = NULL;
+	spin_unlock(&ht->lock);
+
 	iter->p = NULL;
+
+out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 
-static size_t rounded_hashtable_size(struct rhashtable_params *params)
+static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   1UL << params->min_shift);
+		   (unsigned long)params->min_size);
+}
+
+static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+{
+	return jhash2(key, length, seed);
 }
 
 /**
@@ -1052,7 +691,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	struct rhash_head	node;
  * };
  *
- * u32 my_hash_fn(const void *data, u32 seed)
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
  * {
  *	struct test_obj *obj = data;
  *
@@ -1065,47 +704,74 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.obj_hashfn = my_hash_fn,
  * };
  */
-int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
+int rhashtable_init(struct rhashtable *ht,
+		    const struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
 	size_t size;
 
 	size = HASH_DEFAULT_SIZE;
 
-	if ((params->key_len && !params->hashfn) ||
-	    (!params->key_len && !params->obj_hashfn))
+	if ((!params->key_len && !params->obj_hashfn) ||
+	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
 
 	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
 		return -EINVAL;
 
-	params->min_shift = max_t(size_t, params->min_shift,
-				  ilog2(HASH_MIN_SIZE));
-
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
 
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
+	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
-	INIT_LIST_HEAD(&ht->walkers);
+
+	if (params->min_size)
+		ht->p.min_size = roundup_pow_of_two(params->min_size);
+
+	if (params->max_size)
+		ht->p.max_size = rounddown_pow_of_two(params->max_size);
+
+	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
+
+	/* The maximum (not average) chain length grows with the
+	 * size of the hash table, at a rate of (log N)/(log log N).
+	 * The value of 16 is selected so that even if the hash
+	 * table grew to 2^32 you would not expect the maximum
+	 * chain length to exceed it unless we are under attack
+	 * (or extremely unlucky).
+	 *
+	 * As this limit is only to detect attacks, we don't need
+	 * to set it to a lower value as you'd need the chain
+	 * length to vastly exceed 16 to have any real effect
+	 * on the system.
+	 */
+	if (!params->insecure_elasticity)
+		ht->elasticity = 16;
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
 	else
 		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
 
-	tbl = bucket_table_alloc(ht, size);
+	ht->key_len = ht->p.key_len;
+	if (!params->hashfn) {
+		ht->p.hashfn = jhash;
+
+		if (!(ht->key_len & (sizeof(u32) - 1))) {
+			ht->key_len /= sizeof(u32);
+			ht->p.hashfn = rhashtable_jhash2;
+		}
+	}
+
+	tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;
 
 	atomic_set(&ht->nelems, 0);
-	atomic_set(&ht->shift, ilog2(tbl->size));
-	RCU_INIT_POINTER(ht->tbl, tbl);
-	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
-	if (!ht->p.hash_rnd)
-		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
+	RCU_INIT_POINTER(ht->tbl, tbl);
 
 	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
@@ -1114,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 EXPORT_SYMBOL_GPL(rhashtable_init);
 
 /**
- * rhashtable_destroy - destroy hash table
+ * rhashtable_free_and_destroy - free elements and destroy hash table
  * @ht:		the hash table to destroy
+ * @free_fn:	callback to release resources of element
+ * @arg:	pointer passed to free_fn
+ *
+ * Stops an eventual async resize. If defined, invokes free_fn for each
+ * element to releasal resources. Please note that RCU protected
+ * readers may still be accessing the elements. Releasing of resources
+ * must occur in a compatible manner. Then frees the bucket array.
  *
- * Frees the bucket array. This function is not rcu safe, therefore the caller
- * has to make sure that no resizing may happen by unpublishing the hashtable
- * and waiting for the quiescent cycle before releasing the bucket array.
+ * This function will eventually sleep to wait for an async resize
+ * to complete. The caller is responsible that no further write operations
+ * occurs in parallel.
  */
-void rhashtable_destroy(struct rhashtable *ht)
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+				 void (*free_fn)(void *ptr, void *arg),
+				 void *arg)
 {
-	ht->being_destroyed = true;
+	const struct bucket_table *tbl;
+	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
-	bucket_table_free(rht_dereference(ht->tbl, ht));
+	tbl = rht_dereference(ht->tbl, ht);
+	if (free_fn) {
+		for (i = 0; i < tbl->size; i++) {
+			struct rhash_head *pos, *next;
+
+			for (pos = rht_dereference(tbl->buckets[i], ht),
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL;
+			     !rht_is_a_nulls(pos);
+			     pos = next,
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL)
+				free_fn(rht_obj(ht, pos), arg);
+		}
+	}
+
+	bucket_table_free(tbl);
 	mutex_unlock(&ht->mutex);
 }
+EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+	return rhashtable_free_and_destroy(ht, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/lib/sha1.c b/lib/sha1.c
index 1df191e04a24..5a56dfd7b99d 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -198,3 +198,4 @@ void sha_init(__u32 *buf)
 	buf[3] = 0x10325476;
 	buf[4] = 0xc3d2e1f0;
 }
+EXPORT_SYMBOL(sha_init);
diff --git a/lib/string.c b/lib/string.c
index ce81aaec3839..a5792019193c 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset);
 void memzero_explicit(void *s, size_t count)
 {
 	memset(s, 0, count);
-	OPTIMIZER_HIDE_VAR(s);
+	barrier();
 }
 EXPORT_SYMBOL(memzero_explicit);
 
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 8f8c4417f228..c98ae818eb4e 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -4,6 +4,7 @@
  * Copyright 31 August 2008 James Bottomley
  * Copyright (C) 2013, Intel Corporation
  */
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
 #include <linux/export.h>
@@ -14,7 +15,8 @@
 
 /**
  * string_get_size - get the size in the specified units
- * @size:	The size to be converted
+ * @size:	The size to be converted in blocks
+ * @blk_size:	Size of the block (use 1 for size in bytes)
  * @units:	units to use (powers of 1000 or 1024)
  * @buf:	buffer to format to
  * @len:	length of buffer
@@ -24,14 +26,14 @@
  * at least 9 bytes and will always be zero terminated.
  *
  */
-void string_get_size(u64 size, const enum string_size_units units,
+void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
 		     char *buf, int len)
 {
 	static const char *const units_10[] = {
-		"B", "kB", "MB", "GB", "TB", "PB", "EB"
+		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
 	};
 	static const char *const units_2[] = {
-		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
+		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
 	};
 	static const char *const *const units_str[] = {
 		[STRING_UNITS_10] = units_10,
@@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units,
 		[STRING_UNITS_2] = 1024,
 	};
 	int i, j;
-	u32 remainder = 0, sf_cap;
+	u32 remainder = 0, sf_cap, exp;
 	char tmp[8];
+	const char *unit;
 
 	tmp[0] = '\0';
 	i = 0;
-	if (size >= divisor[units]) {
-		while (size >= divisor[units]) {
-			remainder = do_div(size, divisor[units]);
-			i++;
-		}
+	if (!size)
+		goto out;
 
-		sf_cap = size;
-		for (j = 0; sf_cap*10 < 1000; j++)
-			sf_cap *= 10;
+	while (blk_size >= divisor[units]) {
+		remainder = do_div(blk_size, divisor[units]);
+		i++;
+	}
 
-		if (j) {
-			remainder *= 1000;
-			remainder /= divisor[units];
-			snprintf(tmp, sizeof(tmp), ".%03u", remainder);
-			tmp[j+1] = '\0';
-		}
+	exp = divisor[units] / (u32)blk_size;
+	if (size >= exp) {
+		remainder = do_div(size, divisor[units]);
+		remainder *= blk_size;
+		i++;
+	} else {
+		remainder *= size;
+	}
+
+	size *= blk_size;
+	size += remainder / divisor[units];
+	remainder %= divisor[units];
+
+	while (size >= divisor[units]) {
+		remainder = do_div(size, divisor[units]);
+		i++;
 	}
 
+	sf_cap = size;
+	for (j = 0; sf_cap*10 < 1000; j++)
+		sf_cap *= 10;
+
+	if (j) {
+		remainder *= 1000;
+		remainder /= divisor[units];
+		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+		tmp[j+1] = '\0';
+	}
+
+ out:
+	if (i >= ARRAY_SIZE(units_2))
+		unit = "UNK";
+	else
+		unit = units_str[units][i];
+
 	snprintf(buf, len, "%u%s %s", (u32)size,
-		 tmp, units_str[units][i]);
+		 tmp, unit);
 }
 EXPORT_SYMBOL(string_get_size);
 
@@ -239,29 +267,21 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 }
 EXPORT_SYMBOL(string_unescape);
 
-static int escape_passthrough(unsigned char c, char **dst, size_t *osz)
+static bool escape_passthrough(unsigned char c, char **dst, char *end)
 {
 	char *out = *dst;
 
-	if (*osz < 1)
-		return -ENOMEM;
-
-	*out++ = c;
-
-	*dst = out;
-	*osz -= 1;
-
-	return 1;
+	if (out < end)
+		*out = c;
+	*dst = out + 1;
+	return true;
 }
 
-static int escape_space(unsigned char c, char **dst, size_t *osz)
+static bool escape_space(unsigned char c, char **dst, char *end)
 {
 	char *out = *dst;
 	unsigned char to;
 
-	if (*osz < 2)
-		return -ENOMEM;
-
 	switch (c) {
 	case '\n':
 		to = 'n';
@@ -279,26 +299,25 @@ static int escape_space(unsigned char c, char **dst, size_t *osz)
 		to = 'f';
 		break;
 	default:
-		return 0;
+		return false;
 	}
 
-	*out++ = '\\';
-	*out++ = to;
+	if (out < end)
+		*out = '\\';
+	++out;
+	if (out < end)
+		*out = to;
+	++out;
 
 	*dst = out;
-	*osz -= 2;
-
-	return 1;
+	return true;
 }
 
-static int escape_special(unsigned char c, char **dst, size_t *osz)
+static bool escape_special(unsigned char c, char **dst, char *end)
 {
 	char *out = *dst;
 	unsigned char to;
 
-	if (*osz < 2)
-		return -ENOMEM;
-
 	switch (c) {
 	case '\\':
 		to = '\\';
@@ -310,71 +329,78 @@ static int escape_special(unsigned char c, char **dst, size_t *osz)
 		to = 'e';
 		break;
 	default:
-		return 0;
+		return false;
 	}
 
-	*out++ = '\\';
-	*out++ = to;
+	if (out < end)
+		*out = '\\';
+	++out;
+	if (out < end)
+		*out = to;
+	++out;
 
 	*dst = out;
-	*osz -= 2;
-
-	return 1;
+	return true;
 }
 
-static int escape_null(unsigned char c, char **dst, size_t *osz)
+static bool escape_null(unsigned char c, char **dst, char *end)
 {
 	char *out = *dst;
 
-	if (*osz < 2)
-		return -ENOMEM;
-
 	if (c)
-		return 0;
+		return false;
 
-	*out++ = '\\';
-	*out++ = '0';
+	if (out < end)
+		*out = '\\';
+	++out;
+	if (out < end)
+		*out = '0';
+	++out;
 
 	*dst = out;
-	*osz -= 2;
-
-	return 1;
+	return true;
 }
 
-static int escape_octal(unsigned char c, char **dst, size_t *osz)
+static bool escape_octal(unsigned char c, char **dst, char *end)
 {
 	char *out = *dst;
 
-	if (*osz < 4)
-		return -ENOMEM;
-
-	*out++ = '\\';
-	*out++ = ((c >> 6) & 0x07) + '0';
-	*out++ = ((c >> 3) & 0x07) + '0';
-	*out++ = ((c >> 0) & 0x07) + '0';
+	if (out < end)
+		*out = '\\';
+	++out;
+	if (out < end)
+		*out = ((c >> 6) & 0x07) + '0';
+	++out;
+	if (out < end)
+		*out = ((c >> 3) & 0x07) + '0';
+	++out;
+	if (out < end)
+		*out = ((c >> 0) & 0x07) + '0';
+	++out;
 
 	*dst = out;
-	*osz -= 4;
-
-	return 1;
+	return true;
 }
 
-static int escape_hex(unsigned char c, char **dst, size_t *osz)
+static bool escape_hex(unsigned char c, char **dst, char *end)
 {
 	char *out = *dst;
 
-	if (*osz < 4)
-		return -ENOMEM;
-
-	*out++ = '\\';
-	*out++ = 'x';
-	*out++ = hex_asc_hi(c);
-	*out++ = hex_asc_lo(c);
+	if (out < end)
+		*out = '\\';
+	++out;
+	if (out < end)
+		*out = 'x';
+	++out;
+	if (out < end)
+		*out = hex_asc_hi(c);
+	++out;
+	if (out < end)
+		*out = hex_asc_lo(c);
+	++out;
 
 	*dst = out;
-	*osz -= 4;
-
-	return 1;
+	return true;
 }
 
 /**
@@ -426,19 +452,17 @@ static int escape_hex(unsigned char c, char **dst, size_t *osz)
  * it if needs.
  *
  * Return:
- * The amount of the characters processed to the destination buffer, or
- * %-ENOMEM if the size of buffer is not enough to put an escaped character is
- * returned.
- *
- * Even in the case of error @dst pointer will be updated to point to the byte
- * after the last processed character.
+ * The total size of the escaped output that would be generated for
+ * the given input and flags. To check whether the output was
+ * truncated, compare the return value to osz. There is room left in
+ * dst for a '\0' terminator if and only if ret < osz.
  */
-int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
+int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
 		      unsigned int flags, const char *esc)
 {
-	char *out = *dst, *p = out;
+	char *p = dst;
+	char *end = p + osz;
 	bool is_dict = esc && *esc;
-	int ret = 0;
 
 	while (isz--) {
 		unsigned char c = *src++;
@@ -458,55 +482,26 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
 		    (is_dict && !strchr(esc, c))) {
 			/* do nothing */
 		} else {
-			if (flags & ESCAPE_SPACE) {
-				ret = escape_space(c, &p, &osz);
-				if (ret < 0)
-					break;
-				if (ret > 0)
-					continue;
-			}
-
-			if (flags & ESCAPE_SPECIAL) {
-				ret = escape_special(c, &p, &osz);
-				if (ret < 0)
-					break;
-				if (ret > 0)
-					continue;
-			}
-
-			if (flags & ESCAPE_NULL) {
-				ret = escape_null(c, &p, &osz);
-				if (ret < 0)
-					break;
-				if (ret > 0)
-					continue;
-			}
+			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
+				continue;
+
+			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
+				continue;
+
+			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
+				continue;
 
 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
-			if (flags & ESCAPE_OCTAL) {
-				ret = escape_octal(c, &p, &osz);
-				if (ret < 0)
-					break;
+			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
 				continue;
-			}
-			if (flags & ESCAPE_HEX) {
-				ret = escape_hex(c, &p, &osz);
-				if (ret < 0)
-					break;
+
+			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
 				continue;
-			}
 		}
 
-		ret = escape_passthrough(c, &p, &osz);
-		if (ret < 0)
-			break;
+		escape_passthrough(c, &p, end);
 	}
 
-	*dst = p;
-
-	if (ret < 0)
-		return ret;
-
-	return p - out;
+	return p - dst;
 }
 EXPORT_SYMBOL(string_escape_mem);
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c
index daf29a390a89..c227cc43ec0a 100644
--- a/lib/test-hexdump.c
+++ b/lib/test-hexdump.c
@@ -18,26 +18,26 @@ static const unsigned char data_b[] = {
 
 static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C...";
 
-static const char *test_data_1_le[] __initconst = {
+static const char * const test_data_1_le[] __initconst = {
 	"be", "32", "db", "7b", "0a", "18", "93", "b2",
 	"70", "ba", "c4", "24", "7d", "83", "34", "9b",
 	"a6", "9c", "31", "ad", "9c", "0f", "ac", "e9",
 	"4c", "d1", "19", "99", "43", "b1", "af", "0c",
 };
 
-static const char *test_data_2_le[] __initconst = {
+static const char *test_data_2_le[] __initdata = {
 	"32be", "7bdb", "180a", "b293",
 	"ba70", "24c4", "837d", "9b34",
 	"9ca6", "ad31", "0f9c", "e9ac",
 	"d14c", "9919", "b143", "0caf",
 };
 
-static const char *test_data_4_le[] __initconst = {
+static const char *test_data_4_le[] __initdata = {
 	"7bdb32be", "b293180a", "24c4ba70", "9b34837d",
 	"ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143",
 };
 
-static const char *test_data_8_le[] __initconst = {
+static const char *test_data_8_le[] __initdata = {
 	"b293180a7bdb32be", "9b34837d24c4ba70",
 	"e9ac0f9cad319ca6", "0cafb1439919d14c",
 };
@@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
 	char test[32 * 3 + 2 + 32 + 1];
 	char real[32 * 3 + 2 + 32 + 1];
 	char *p;
-	const char **result;
+	const char * const *result;
 	size_t l = len;
 	int gs = groupsize, rs = rowsize;
 	unsigned int i;
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index ab0d30e1e18f..8e376efd88a4 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -260,16 +260,28 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2,
 	return NULL;
 }
 
+static __init void
+test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc,
+			    int q_test, const char *name)
+{
+	int q_real;
+
+	q_real = string_escape_mem(in, p, NULL, 0, flags, esc);
+	if (q_real != q_test)
+		pr_warn("Test '%s' failed: flags = %u, osz = 0, expected %d, got %d\n",
+			name, flags, q_test, q_real);
+}
+
 static __init void test_string_escape(const char *name,
 				      const struct test_string_2 *s2,
 				      unsigned int flags, const char *esc)
 {
-	int q_real = 512;
-	char *out_test = kmalloc(q_real, GFP_KERNEL);
-	char *out_real = kmalloc(q_real, GFP_KERNEL);
+	size_t out_size = 512;
+	char *out_test = kmalloc(out_size, GFP_KERNEL);
+	char *out_real = kmalloc(out_size, GFP_KERNEL);
 	char *in = kmalloc(256, GFP_KERNEL);
-	char *buf = out_real;
 	int p = 0, q_test = 0;
+	int q_real;
 
 	if (!out_test || !out_real || !in)
 		goto out;
@@ -301,29 +313,19 @@ static __init void test_string_escape(const char *name,
 		q_test += len;
 	}
 
-	q_real = string_escape_mem(in, p, &buf, q_real, flags, esc);
+	q_real = string_escape_mem(in, p, out_real, out_size, flags, esc);
 
 	test_string_check_buf(name, flags, in, p, out_real, q_real, out_test,
 			      q_test);
+
+	test_string_escape_overflow(in, p, flags, esc, q_test, name);
+
 out:
 	kfree(in);
 	kfree(out_real);
 	kfree(out_test);
 }
 
-static __init void test_string_escape_nomem(void)
-{
-	char *in = "\eb \\C\007\"\x90\r]";
-	char out[64], *buf = out;
-	int rc = -ENOMEM, ret;
-
-	ret = string_escape_str_any_np(in, &buf, strlen(in), NULL);
-	if (ret == rc)
-		return;
-
-	pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc);
-}
-
 static int __init test_string_helpers_init(void)
 {
 	unsigned int i;
@@ -342,8 +344,6 @@ static int __init test_string_helpers_init(void)
 	for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
 		test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
 
-	test_string_escape_nomem();
-
 	return -EINVAL;
 }
 module_init(test_string_helpers_init);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 67c7593d1dd6..b2957540d3c7 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,6 +38,15 @@ struct test_obj {
 	struct rhash_head	node;
 };
 
+static const struct rhashtable_params test_rht_params = {
+	.nelem_hint = TEST_HT_SIZE,
+	.head_offset = offsetof(struct test_obj, node),
+	.key_offset = offsetof(struct test_obj, value),
+	.key_len = sizeof(int),
+	.hashfn = jhash,
+	.nulls_base = (3U << RHT_BASE_SHIFT),
+};
+
 static int __init test_rht_lookup(struct rhashtable *ht)
 {
 	unsigned int i;
@@ -47,7 +56,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
 		bool expected = !(i % 2);
 		u32 key = i;
 
-		obj = rhashtable_lookup(ht, &key);
+		obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
 
 		if (expected && !obj) {
 			pr_warn("Test failed: Could not find key %u\n", key);
@@ -80,7 +89,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 		rcu_cnt = cnt = 0;
 
 		if (!quiet)
-			pr_info(" [%#4x/%zu]", i, tbl->size);
+			pr_info(" [%#4x/%u]", i, tbl->size);
 
 		rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
 			cnt++;
@@ -133,7 +142,11 @@ static int __init test_rhashtable(struct rhashtable *ht)
 		obj->ptr = TEST_PTR;
 		obj->value = i * 2;
 
-		rhashtable_insert(ht, &obj->node);
+		err = rhashtable_insert_fast(ht, &obj->node, test_rht_params);
+		if (err) {
+			kfree(obj);
+			goto error;
+		}
 	}
 
 	rcu_read_lock();
@@ -141,30 +154,6 @@ static int __init test_rhashtable(struct rhashtable *ht)
 	test_rht_lookup(ht);
 	rcu_read_unlock();
 
-	for (i = 0; i < TEST_NEXPANDS; i++) {
-		pr_info("  Table expansion iteration %u...\n", i);
-		mutex_lock(&ht->mutex);
-		rhashtable_expand(ht);
-		mutex_unlock(&ht->mutex);
-
-		rcu_read_lock();
-		pr_info("  Verifying lookups...\n");
-		test_rht_lookup(ht);
-		rcu_read_unlock();
-	}
-
-	for (i = 0; i < TEST_NEXPANDS; i++) {
-		pr_info("  Table shrinkage iteration %u...\n", i);
-		mutex_lock(&ht->mutex);
-		rhashtable_shrink(ht);
-		mutex_unlock(&ht->mutex);
-
-		rcu_read_lock();
-		pr_info("  Verifying lookups...\n");
-		test_rht_lookup(ht);
-		rcu_read_unlock();
-	}
-
 	rcu_read_lock();
 	test_bucket_stats(ht, true);
 	rcu_read_unlock();
@@ -173,10 +162,10 @@ static int __init test_rhashtable(struct rhashtable *ht)
 	for (i = 0; i < TEST_ENTRIES; i++) {
 		u32 key = i * 2;
 
-		obj = rhashtable_lookup(ht, &key);
+		obj = rhashtable_lookup_fast(ht, &key, test_rht_params);
 		BUG_ON(!obj);
 
-		rhashtable_remove(ht, &obj->node);
+		rhashtable_remove_fast(ht, &obj->node, test_rht_params);
 		kfree(obj);
 	}
 
@@ -195,20 +184,11 @@ static struct rhashtable ht;
 
 static int __init test_rht_init(void)
 {
-	struct rhashtable_params params = {
-		.nelem_hint = TEST_HT_SIZE,
-		.head_offset = offsetof(struct test_obj, node),
-		.key_offset = offsetof(struct test_obj, value),
-		.key_len = sizeof(int),
-		.hashfn = jhash,
-		.max_shift = 1, /* we expand/shrink manually here */
-		.nulls_base = (3U << RHT_BASE_SHIFT),
-	};
 	int err;
 
 	pr_info("Running resizable hashtable tests...\n");
 
-	err = rhashtable_init(&ht, &params);
+	err = rhashtable_init(&ht, &test_rht_params);
 	if (err < 0) {
 		pr_warn("Test failed: Unable to initialize hashtable: %d\n",
 			err);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b235c96167d3..da39c608a28c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,6 +17,7 @@
  */
 
 #include <stdarg.h>
+#include <linux/clk-provider.h>
 #include <linux/module.h>	/* for KSYM_SYMBOL_LEN */
 #include <linux/types.h>
 #include <linux/string.h>
@@ -32,6 +33,7 @@
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/sections.h>	/* for dereference_function_descriptor() */
+#include <asm/byteorder.h>	/* cpu_to_le16 */
 
 #include <linux/string_helpers.h>
 #include "kstrtox.h"
@@ -121,142 +123,145 @@ int skip_atoi(const char **s)
 	return i;
 }
 
-/* Decimal conversion is by far the most typical, and is used
- * for /proc and /sys data. This directly impacts e.g. top performance
- * with many processes running. We optimize it for speed
- * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
- * (with permission from the author, Douglas W. Jones).
+/*
+ * Decimal conversion is by far the most typical, and is used for
+ * /proc and /sys data. This directly impacts e.g. top performance
+ * with many processes running. We optimize it for speed by emitting
+ * two characters at a time, using a 200 byte lookup table. This
+ * roughly halves the number of multiplications compared to computing
+ * the digits one at a time. Implementation strongly inspired by the
+ * previous version, which in turn used ideas described at
+ * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
+ * from the author, Douglas W. Jones).
+ *
+ * It turns out there is precisely one 26 bit fixed-point
+ * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
+ * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
+ * range happens to be somewhat larger (x <= 1073741898), but that's
+ * irrelevant for our purpose.
+ *
+ * For dividing a number in the range [10^4, 10^6-1] by 100, we still
+ * need a 32x32->64 bit multiply, so we simply use the same constant.
+ *
+ * For dividing a number in the range [100, 10^4-1] by 100, there are
+ * several options. The simplest is (x * 0x147b) >> 19, which is valid
+ * for all x <= 43698.
  */
 
-#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
-/* Formats correctly any integer in [0, 999999999] */
+static const u16 decpair[100] = {
+#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
+	_( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
+	_(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
+	_(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
+	_(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
+	_(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
+	_(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
+	_(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
+	_(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
+	_(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
+	_(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
+#undef _
+};
+
+/*
+ * This will print a single '0' even if r == 0, since we would
+ * immediately jump to out_r where two 0s would be written but only
+ * one of them accounted for in buf. This is needed by ip4_string
+ * below. All other callers pass a non-zero value of r.
+*/
 static noinline_for_stack
-char *put_dec_full9(char *buf, unsigned q)
+char *put_dec_trunc8(char *buf, unsigned r)
 {
-	unsigned r;
+	unsigned q;
 
-	/*
-	 * Possible ways to approx. divide by 10
-	 * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit)
-	 * (x * 0xcccd) >> 19     x <      81920 (x < 262149 when 64-bit mul)
-	 * (x * 0x6667) >> 18     x <      43699
-	 * (x * 0x3334) >> 17     x <      16389
-	 * (x * 0x199a) >> 16     x <      16389
-	 * (x * 0x0ccd) >> 15     x <      16389
-	 * (x * 0x0667) >> 14     x <       2739
-	 * (x * 0x0334) >> 13     x <       1029
-	 * (x * 0x019a) >> 12     x <       1029
-	 * (x * 0x00cd) >> 11     x <       1029 shorter code than * 0x67 (on i386)
-	 * (x * 0x0067) >> 10     x <        179
-	 * (x * 0x0034) >>  9     x <         69 same
-	 * (x * 0x001a) >>  8     x <         69 same
-	 * (x * 0x000d) >>  7     x <         69 same, shortest code (on i386)
-	 * (x * 0x0007) >>  6     x <         19
-	 * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
-	 */
-	r      = (q * (uint64_t)0x1999999a) >> 32;
-	*buf++ = (q - 10 * r) + '0'; /* 1 */
-	q      = (r * (uint64_t)0x1999999a) >> 32;
-	*buf++ = (r - 10 * q) + '0'; /* 2 */
-	r      = (q * (uint64_t)0x1999999a) >> 32;
-	*buf++ = (q - 10 * r) + '0'; /* 3 */
-	q      = (r * (uint64_t)0x1999999a) >> 32;
-	*buf++ = (r - 10 * q) + '0'; /* 4 */
-	r      = (q * (uint64_t)0x1999999a) >> 32;
-	*buf++ = (q - 10 * r) + '0'; /* 5 */
-	/* Now value is under 10000, can avoid 64-bit multiply */
-	q      = (r * 0x199a) >> 16;
-	*buf++ = (r - 10 * q)  + '0'; /* 6 */
-	r      = (q * 0xcd) >> 11;
-	*buf++ = (q - 10 * r)  + '0'; /* 7 */
-	q      = (r * 0xcd) >> 11;
-	*buf++ = (r - 10 * q) + '0'; /* 8 */
-	*buf++ = q + '0'; /* 9 */
+	/* 1 <= r < 10^8 */
+	if (r < 100)
+		goto out_r;
+
+	/* 100 <= r < 10^8 */
+	q = (r * (u64)0x28f5c29) >> 32;
+	*((u16 *)buf) = decpair[r - 100*q];
+	buf += 2;
+
+	/* 1 <= q < 10^6 */
+	if (q < 100)
+		goto out_q;
+
+	/*  100 <= q < 10^6 */
+	r = (q * (u64)0x28f5c29) >> 32;
+	*((u16 *)buf) = decpair[q - 100*r];
+	buf += 2;
+
+	/* 1 <= r < 10^4 */
+	if (r < 100)
+		goto out_r;
+
+	/* 100 <= r < 10^4 */
+	q = (r * 0x147b) >> 19;
+	*((u16 *)buf) = decpair[r - 100*q];
+	buf += 2;
+out_q:
+	/* 1 <= q < 100 */
+	r = q;
+out_r:
+	/* 1 <= r < 100 */
+	*((u16 *)buf) = decpair[r];
+	buf += r < 10 ? 1 : 2;
 	return buf;
 }
-#endif
 
-/* Similar to above but do not pad with zeros.
- * Code can be easily arranged to print 9 digits too, but our callers
- * always call put_dec_full9() instead when the number has 9 decimal digits.
- */
+#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
 static noinline_for_stack
-char *put_dec_trunc8(char *buf, unsigned r)
+char *put_dec_full8(char *buf, unsigned r)
 {
 	unsigned q;
 
-	/* Copy of previous function's body with added early returns */
-	while (r >= 10000) {
-		q = r + '0';
-		r  = (r * (uint64_t)0x1999999a) >> 32;
-		*buf++ = q - 10*r;
-	}
-
-	q      = (r * 0x199a) >> 16;	/* r <= 9999 */
-	*buf++ = (r - 10 * q)  + '0';
-	if (q == 0)
-		return buf;
-	r      = (q * 0xcd) >> 11;	/* q <= 999 */
-	*buf++ = (q - 10 * r)  + '0';
-	if (r == 0)
-		return buf;
-	q      = (r * 0xcd) >> 11;	/* r <= 99 */
-	*buf++ = (r - 10 * q) + '0';
-	if (q == 0)
-		return buf;
-	*buf++ = q + '0';		 /* q <= 9 */
-	return buf;
-}
+	/* 0 <= r < 10^8 */
+	q = (r * (u64)0x28f5c29) >> 32;
+	*((u16 *)buf) = decpair[r - 100*q];
+	buf += 2;
 
-/* There are two algorithms to print larger numbers.
- * One is generic: divide by 1000000000 and repeatedly print
- * groups of (up to) 9 digits. It's conceptually simple,
- * but requires a (unsigned long long) / 1000000000 division.
- *
- * Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
- * manipulates them cleverly and generates groups of 4 decimal digits.
- * It so happens that it does NOT require long long division.
- *
- * If long is > 32 bits, division of 64-bit values is relatively easy,
- * and we will use the first algorithm.
- * If long long is > 64 bits (strange architecture with VERY large long long),
- * second algorithm can't be used, and we again use the first one.
- *
- * Else (if long is 32 bits and long long is 64 bits) we use second one.
- */
+	/* 0 <= q < 10^6 */
+	r = (q * (u64)0x28f5c29) >> 32;
+	*((u16 *)buf) = decpair[q - 100*r];
+	buf += 2;
 
-#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
+	/* 0 <= r < 10^4 */
+	q = (r * 0x147b) >> 19;
+	*((u16 *)buf) = decpair[r - 100*q];
+	buf += 2;
 
-/* First algorithm: generic */
+	/* 0 <= q < 100 */
+	*((u16 *)buf) = decpair[q];
+	buf += 2;
+	return buf;
+}
 
-static
+static noinline_for_stack
 char *put_dec(char *buf, unsigned long long n)
 {
-	if (n >= 100*1000*1000) {
-		while (n >= 1000*1000*1000)
-			buf = put_dec_full9(buf, do_div(n, 1000*1000*1000));
-		if (n >= 100*1000*1000)
-			return put_dec_full9(buf, n);
-	}
+	if (n >= 100*1000*1000)
+		buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
+	/* 1 <= n <= 1.6e11 */
+	if (n >= 100*1000*1000)
+		buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
+	/* 1 <= n < 1e8 */
 	return put_dec_trunc8(buf, n);
 }
 
-#else
-
-/* Second algorithm: valid only for 64-bit long longs */
+#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
 
-/* See comment in put_dec_full9 for choice of constants */
-static noinline_for_stack
-void put_dec_full4(char *buf, unsigned q)
+static void
+put_dec_full4(char *buf, unsigned r)
 {
-	unsigned r;
-	r      = (q * 0xccd) >> 15;
-	buf[0] = (q - 10 * r) + '0';
-	q      = (r * 0xcd) >> 11;
-	buf[1] = (r - 10 * q)  + '0';
-	r      = (q * 0xcd) >> 11;
-	buf[2] = (q - 10 * r)  + '0';
-	buf[3] = r + '0';
+	unsigned q;
+
+	/* 0 <= r < 10^4 */
+	q = (r * 0x147b) >> 19;
+	*((u16 *)buf) = decpair[r - 100*q];
+	buf += 2;
+	/* 0 <= q < 100 */
+	*((u16 *)buf) = decpair[q];
 }
 
 /*
@@ -264,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q)
  * The approximation x/10000 == (x * 0x346DC5D7) >> 43
  * holds for all x < 1,128,869,999.  The largest value this
  * helper will ever be asked to convert is 1,125,520,955.
- * (d1 in the put_dec code, assuming n is all-ones).
+ * (second call in the put_dec code, assuming n is all-ones).
  */
-static
+static noinline_for_stack
 unsigned put_dec_helper4(char *buf, unsigned x)
 {
         uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
@@ -293,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n)
 	d2  = (h      ) & 0xffff;
 	d3  = (h >> 16); /* implicit "& 0xffff" */
 
+	/* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
+	     = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
 	q   = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
 	q = put_dec_helper4(buf, q);
 
@@ -322,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n)
  */
 int num_to_str(char *buf, int size, unsigned long long num)
 {
-	char tmp[sizeof(num) * 3];
+	/* put_dec requires 2-byte alignment of the buffer. */
+	char tmp[sizeof(num) * 3] __aligned(2);
 	int idx, len;
 
 	/* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
@@ -340,11 +348,11 @@ int num_to_str(char *buf, int size, unsigned long long num)
 	return len;
 }
 
-#define ZEROPAD	1		/* pad with zero */
-#define SIGN	2		/* unsigned/signed long */
+#define SIGN	1		/* unsigned/signed, must be 1 */
+#define LEFT	2		/* left justified */
 #define PLUS	4		/* show plus */
 #define SPACE	8		/* space if plus */
-#define LEFT	16		/* left justified */
+#define ZEROPAD	16		/* pad with zero, must be 16 == '0' - ' ' */
 #define SMALL	32		/* use lowercase in hex (must be 32 == 0x20) */
 #define SPECIAL	64		/* prefix hex with "0x", octal with "0" */
 
@@ -383,10 +391,8 @@ static noinline_for_stack
 char *number(char *buf, char *end, unsigned long long num,
 	     struct printf_spec spec)
 {
-	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
-	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
-
-	char tmp[66];
+	/* put_dec requires 2-byte alignment of the buffer. */
+	char tmp[3 * sizeof(num)] __aligned(2);
 	char sign;
 	char locase;
 	int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -422,12 +428,7 @@ char *number(char *buf, char *end, unsigned long long num,
 	/* generate full string in tmp[], in reverse order */
 	i = 0;
 	if (num < spec.base)
-		tmp[i++] = digits[num] | locase;
-	/* Generic code, for any base:
-	else do {
-		tmp[i++] = (digits[do_div(num,base)] | locase);
-	} while (num != 0);
-	*/
+		tmp[i++] = hex_asc_upper[num] | locase;
 	else if (spec.base != 10) { /* 8 or 16 */
 		int mask = spec.base - 1;
 		int shift = 3;
@@ -435,7 +436,7 @@ char *number(char *buf, char *end, unsigned long long num,
 		if (spec.base == 16)
 			shift = 4;
 		do {
-			tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
+			tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase);
 			num >>= shift;
 		} while (num);
 	} else { /* base 10 */
@@ -447,7 +448,7 @@ char *number(char *buf, char *end, unsigned long long num,
 		spec.precision = i;
 	/* leading space padding */
 	spec.field_width -= spec.precision;
-	if (!(spec.flags & (ZEROPAD+LEFT))) {
+	if (!(spec.flags & (ZEROPAD | LEFT))) {
 		while (--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = ' ';
@@ -475,7 +476,8 @@ char *number(char *buf, char *end, unsigned long long num,
 	}
 	/* zero or space padding */
 	if (!(spec.flags & LEFT)) {
-		char c = (spec.flags & ZEROPAD) ? '0' : ' ';
+		char c = ' ' + (spec.flags & ZEROPAD);
+		BUILD_BUG_ON(' ' + ZEROPAD != '0');
 		while (--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = c;
@@ -783,11 +785,19 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 	if (spec.field_width > 0)
 		len = min_t(int, spec.field_width, 64);
 
-	for (i = 0; i < len && buf < end - 1; i++) {
-		buf = hex_byte_pack(buf, addr[i]);
+	for (i = 0; i < len; ++i) {
+		if (buf < end)
+			*buf = hex_asc_hi(addr[i]);
+		++buf;
+		if (buf < end)
+			*buf = hex_asc_lo(addr[i]);
+		++buf;
 
-		if (buf < end && separator && i != len - 1)
-			*buf++ = separator;
+		if (separator && i != len - 1) {
+			if (buf < end)
+				*buf = separator;
+			++buf;
+		}
 	}
 
 	return buf;
@@ -942,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
 		break;
 	}
 	for (i = 0; i < 4; i++) {
-		char temp[3];	/* hold each IP quad in reverse order */
+		char temp[4] __aligned(2);	/* hold each IP quad in reverse order */
 		int digits = put_dec_trunc8(temp, addr[index]) - temp;
 		if (leading_zeros) {
 			if (digits < 3)
@@ -1233,8 +1243,12 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 
 	len = spec.field_width < 0 ? 1 : spec.field_width;
 
-	/* Ignore the error. We print as many characters as we can */
-	string_escape_mem(addr, len, &buf, end - buf, flags, NULL);
+	/*
+	 * string_escape_mem() writes as many characters as it can to
+	 * the given buffer, and returns the total size of the output
+	 * had the buffer been big enough.
+	 */
+	buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL);
 
 	return buf;
 }
@@ -1322,6 +1336,30 @@ char *address_val(char *buf, char *end, const void *addr,
 	return number(buf, end, num, spec);
 }
 
+static noinline_for_stack
+char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
+	    const char *fmt)
+{
+	if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk)
+		return string(buf, end, NULL, spec);
+
+	switch (fmt[1]) {
+	case 'r':
+		return number(buf, end, clk_get_rate(clk), spec);
+
+	case 'n':
+	default:
+#ifdef CONFIG_COMMON_CLK
+		return string(buf, end, __clk_get_name(clk), spec);
+#else
+		spec.base = 16;
+		spec.field_width = sizeof(unsigned long) * 2 + 2;
+		spec.flags |= SPECIAL | SMALL | ZEROPAD;
+		return number(buf, end, (unsigned long)clk, spec);
+#endif
+	}
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1404,6 +1442,11 @@ int kptr_restrict __read_mostly;
  *           (default assumed to be phys_addr_t, passed by reference)
  * - 'd[234]' For a dentry name (optionally 2-4 last components)
  * - 'D[234]' Same as 'd' but for a struct file
+ * - 'C' For a clock, it prints the name (Common Clock Framework) or address
+ *       (legacy clock framework) of the clock
+ * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
+ *        (legacy clock framework) of the clock
+ * - 'Cr' For a clock, it prints the current rate of the clock
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -1548,6 +1591,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		return address_val(buf, end, ptr, spec, fmt);
 	case 'd':
 		return dentry_name(buf, end, ptr, spec, fmt);
+	case 'C':
+		return clock(buf, end, ptr, spec, fmt);
 	case 'D':
 		return dentry_name(buf, end,
 				   ((const struct file *)ptr)->f_path.dentry,
@@ -1738,29 +1783,21 @@ qualifier:
 	if (spec->qualifier == 'L')
 		spec->type = FORMAT_TYPE_LONG_LONG;
 	else if (spec->qualifier == 'l') {
-		if (spec->flags & SIGN)
-			spec->type = FORMAT_TYPE_LONG;
-		else
-			spec->type = FORMAT_TYPE_ULONG;
+		BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG);
+		spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN);
 	} else if (_tolower(spec->qualifier) == 'z') {
 		spec->type = FORMAT_TYPE_SIZE_T;
 	} else if (spec->qualifier == 't') {
 		spec->type = FORMAT_TYPE_PTRDIFF;
 	} else if (spec->qualifier == 'H') {
-		if (spec->flags & SIGN)
-			spec->type = FORMAT_TYPE_BYTE;
-		else
-			spec->type = FORMAT_TYPE_UBYTE;
+		BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE);
+		spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN);
 	} else if (spec->qualifier == 'h') {
-		if (spec->flags & SIGN)
-			spec->type = FORMAT_TYPE_SHORT;
-		else
-			spec->type = FORMAT_TYPE_USHORT;
+		BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT);
+		spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN);
 	} else {
-		if (spec->flags & SIGN)
-			spec->type = FORMAT_TYPE_INT;
-		else
-			spec->type = FORMAT_TYPE_UINT;
+		BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT);
+		spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN);
 	}
 
 	return ++fmt - start;
@@ -1800,6 +1837,11 @@ qualifier:
  * %*pE[achnops] print an escaped buffer
  * %*ph[CDN] a variable-length hex string with a separator (supports up to 64
  *           bytes of the input)
+ * %pC output the name (Common Clock Framework) or address (legacy clock
+ *     framework) of a clock
+ * %pCn output the name (Common Clock Framework) or address (legacy clock
+ *      framework) of a clock
+ * %pCr output the current rate of a clock
  * %n is ignored
  *
  * ** Please update Documentation/printk-formats.txt when making changes **
author	Dave Airlie <airlied@redhat.com>	2015-04-20 11:32:26 +1000
committer	Dave Airlie <airlied@redhat.com>	2015-04-20 13:05:20 +1000
commit	2c33ce009ca2389dbf0535d0672214d09738e35e (patch)
tree	6186a6458c3c160385d794a23eaf07c786a9e61b /lib
parent	cec32a47010647e8b0603726ebb75b990a4057a4 (diff)
parent	09d51602cf84a1264946711dd4ea0dddbac599a1 (diff)
download	linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.gz linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.bz2 linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.zip