13 files changed, 1320 insertions, 114 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 3a2ef67db6c7..a16555281d53 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -460,16 +460,6 @@ config ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 config LRU_CACHE
 	tristate
 
-config AVERAGE
-	bool "Averaging functions"
-	help
-	  This option is provided for the case where no in-kernel-tree
-	  modules require averaging functions, but a module built outside
-	  the kernel tree does. Such modules that use library averaging
-	  functions require Y here.
-
-	  If unsure, say N.
-
 config CLZ_TAB
 	bool
 
@@ -521,6 +511,13 @@ config UCS2_STRING
 
 source "lib/fonts/Kconfig"
 
+config SG_SPLIT
+	def_bool n
+	help
+	 Provides a heler to split scatterlists into chunks, each chunk being a
+	 scatterlist. This should be selected by a driver or an API which
+	 whishes to split a scatterlist amongst multiple DMA channel.
+
 #
 # sg chaining option
 #
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d859305c556..ab76b99adc85 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1347,20 +1347,6 @@ config RCU_CPU_STALL_TIMEOUT
 	  RCU grace period persists, additional CPU stall warnings are
 	  printed at more widely spaced intervals.
 
-config RCU_CPU_STALL_INFO
-	bool "Print additional diagnostics on RCU CPU stall"
-	depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL
-	default y
-	help
-	  For each stalled CPU that is aware of the current RCU grace
-	  period, print out additional per-CPU diagnostic information
-	  regarding scheduling-clock ticks, idle state, and,
-	  for RCU_FAST_NO_HZ kernels, idle-entry state.
-
-	  Say N if you are unsure.
-
-	  Say Y if you want to enable such diagnostics.
-
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on DEBUG_KERNEL
@@ -1373,7 +1359,7 @@ config RCU_TRACE
 	  Say N if you are unsure.
 
 config RCU_EQS_DEBUG
-	bool "Use this when adding any sort of NO_HZ support to your arch"
+	bool "Provide debugging asserts for adding NO_HZ support to an arch"
 	depends on DEBUG_KERNEL
 	help
 	  This option provides consistency checks in RCU's handling of
diff --git a/lib/Makefile b/lib/Makefile
index 9f2fc71a14a3..f01c558bf80d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -140,8 +140,6 @@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
 
 obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
 
-obj-$(CONFIG_AVERAGE) += average.o
-
 obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
@@ -162,6 +160,7 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
 
 obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
 
+obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
diff --git a/lib/average.c b/lib/average.c
deleted file mode 100644
index 114d1beae0c7..000000000000
--- a/lib/average.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * lib/average.c
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/export.h>
-#include <linux/average.h>
-#include <linux/kernel.h>
-#include <linux/bug.h>
-#include <linux/log2.h>
-
-/**
- * DOC: Exponentially Weighted Moving Average (EWMA)
- *
- * These are generic functions for calculating Exponentially Weighted Moving
- * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
- * up internal representation of the average value to prevent rounding errors.
- * The factor for scaling up and the exponential weight (or decay rate) have to
- * be specified thru the init fuction. The structure should not be accessed
- * directly but only thru the helper functions.
- */
-
-/**
- * ewma_init() - Initialize EWMA parameters
- * @avg: Average structure
- * @factor: Factor to use for the scaled up internal value. The maximum value
- *	of averages can be ULONG_MAX/(factor*weight). For performance reasons
- *	factor has to be a power of 2.
- * @weight: Exponential weight, or decay rate. This defines how fast the
- *	influence of older values decreases. For performance reasons weight has
- *	to be a power of 2.
- *
- * Initialize the EWMA parameters for a given struct ewma @avg.
- */
-void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
-{
-	WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
-
-	avg->weight = ilog2(weight);
-	avg->factor = ilog2(factor);
-	avg->internal = 0;
-}
-EXPORT_SYMBOL(ewma_init);
-
-/**
- * ewma_add() - Exponentially weighted moving average (EWMA)
- * @avg: Average structure
- * @val: Current value
- *
- * Add a sample to the average.
- */
-struct ewma *ewma_add(struct ewma *avg, unsigned long val)
-{
-	unsigned long internal = ACCESS_ONCE(avg->internal);
-
-	ACCESS_ONCE(avg->internal) = internal ?
-		(((internal << avg->weight) - internal) +
-			(val << avg->factor)) >> avg->weight :
-		(val << avg->factor);
-	return avg;
-}
-EXPORT_SYMBOL(ewma_add);
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
index df30632f0bef..ff19f66d3f7f 100644
--- a/lib/iommu-common.c
+++ b/lib/iommu-common.c
@@ -119,7 +119,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev,
 	unsigned long align_mask = 0;
 
 	if (align_order > 0)
-		align_mask = 0xffffffffffffffffl >> (64 - align_order);
+		align_mask = ~0ul >> (BITS_PER_LONG - align_order);
 
 	/* Sanity check */
 	if (unlikely(npages == 0)) {
diff --git a/lib/klist.c b/lib/klist.c
index 89b485a2a58d..d74cf7a29afd 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -324,6 +324,47 @@ static struct klist_node *to_klist_node(struct list_head *n)
 }
 
 /**
+ * klist_prev - Ante up prev node in list.
+ * @i: Iterator structure.
+ *
+ * First grab list lock. Decrement the reference count of the previous
+ * node, if there was one. Grab the prev node, increment its reference
+ * count, drop the lock, and return that prev node.
+ */
+struct klist_node *klist_prev(struct klist_iter *i)
+{
+	void (*put)(struct klist_node *) = i->i_klist->put;
+	struct klist_node *last = i->i_cur;
+	struct klist_node *prev;
+
+	spin_lock(&i->i_klist->k_lock);
+
+	if (last) {
+		prev = to_klist_node(last->n_node.prev);
+		if (!klist_dec_and_del(last))
+			put = NULL;
+	} else
+		prev = to_klist_node(i->i_klist->k_list.prev);
+
+	i->i_cur = NULL;
+	while (prev != to_klist_node(&i->i_klist->k_list)) {
+		if (likely(!knode_dead(prev))) {
+			kref_get(&prev->n_ref);
+			i->i_cur = prev;
+			break;
+		}
+		prev = to_klist_node(prev->n_node.prev);
+	}
+
+	spin_unlock(&i->i_klist->k_lock);
+
+	if (put && last)
+		put(last);
+	return i->i_cur;
+}
+EXPORT_SYMBOL_GPL(klist_prev);
+
+/**
  * klist_next - Ante up next node in list.
  * @i: Iterator structure.
  *
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index bc0a1da8afba..95c52a95259e 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -146,18 +146,25 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	uint8_t *p;
 	mpi_limb_t alimb;
 	unsigned int n = mpi_get_size(a);
-	int i;
+	int i, lzeros = 0;
 
-	if (buf_len < n || !buf)
+	if (buf_len < n || !buf || !nbytes)
 		return -EINVAL;
 
 	if (sign)
 		*sign = a->sign;
 
-	if (nbytes)
-		*nbytes = n;
+	p = (void *)&a->d[a->nlimbs] - 1;
+
+	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
+		if (!*p)
+			lzeros++;
+		else
+			break;
+	}
 
 	p = buf;
+	*nbytes = n - lzeros;
 
 	for (i = a->nlimbs - 1; i >= 0; i--) {
 		alimb = a->d[i];
@@ -178,6 +185,19 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 #else
 #error please implement for this limb size.
 #endif
+
+		if (lzeros > 0) {
+			if (lzeros >= sizeof(alimb)) {
+				p -= sizeof(alimb);
+			} else {
+				mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
+				mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+							+ lzeros;
+				*limb1 = *limb2;
+				p -= lzeros;
+			}
+			lzeros -= sizeof(alimb);
+		}
 	}
 	return 0;
 }
@@ -197,7 +217,7 @@ EXPORT_SYMBOL_GPL(mpi_read_buffer);
  */
 void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
 {
-	uint8_t *buf, *p;
+	uint8_t *buf;
 	unsigned int n;
 	int ret;
 
@@ -220,14 +240,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
 		kfree(buf);
 		return NULL;
 	}
-
-	/* this is sub-optimal but we need to do the shift operation
-	 * because the caller has to free the returned buffer */
-	for (p = buf; !*p && *nbytes; p++, --*nbytes)
-		;
-	if (p != buf)
-		memmove(buf, p, *nbytes);
-
 	return buf;
 }
 EXPORT_SYMBOL_GPL(mpi_get_buffer);
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
index bcce5f149310..5f5d24d1d53f 100644
--- a/lib/pci_iomap.c
+++ b/lib/pci_iomap.c
@@ -52,6 +52,51 @@ void __iomem *pci_iomap_range(struct pci_dev *dev,
 EXPORT_SYMBOL(pci_iomap_range);
 
 /**
+ * pci_iomap_wc_range - create a virtual WC mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @offset: map memory at the given offset in BAR
+ * @maxlen: max length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way. When possible write combining
+ * is used.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR from offset to the end, pass %0 here.
+ * */
+void __iomem *pci_iomap_wc_range(struct pci_dev *dev,
+				 int bar,
+				 unsigned long offset,
+				 unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+
+	if (flags & IORESOURCE_IO)
+		return NULL;
+
+	if (len <= offset || !start)
+		return NULL;
+
+	len -= offset;
+	start += offset;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+
+	if (flags & IORESOURCE_MEM)
+		return ioremap_wc(start, len);
+
+	/* What? */
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_iomap_wc_range);
+
+/**
  * pci_iomap - create a virtual mapping cookie for a PCI BAR
  * @dev: PCI device that owns the BAR
  * @bar: BAR number
@@ -70,4 +115,25 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 	return pci_iomap_range(dev, bar, 0, maxlen);
 }
 EXPORT_SYMBOL(pci_iomap);
+
+/**
+ * pci_iomap_wc - create a virtual WC mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way. When possible write combining
+ * is used.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	return pci_iomap_wc_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL_GPL(pci_iomap_wc);
 #endif /* CONFIG_PCI */
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index d105a9f56878..bafa9933fa76 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -105,16 +105,12 @@ EXPORT_SYMBOL(sg_nents_for_len);
  **/
 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 {
-#ifndef CONFIG_ARCH_HAS_SG_CHAIN
-	struct scatterlist *ret = &sgl[nents - 1];
-#else
 	struct scatterlist *sg, *ret = NULL;
 	unsigned int i;
 
 	for_each_sg(sgl, sg, nents, i)
 		ret = sg;
 
-#endif
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sgl[0].sg_magic != SG_MAGIC);
 	BUG_ON(!sg_is_last(ret));
diff --git a/lib/sg_split.c b/lib/sg_split.c
new file mode 100644
index 000000000000..b063410c3593
--- /dev/null
+++ b/lib/sg_split.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2015 Robert Jarzmik <robert.jarzmik@free.fr>
+ *
+ * Scatterlist splitting helpers.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct sg_splitter {
+	struct scatterlist *in_sg0;
+	int nents;
+	off_t skip_sg0;
+	unsigned int length_last_sg;
+
+	struct scatterlist *out_sg;
+};
+
+static int sg_calculate_split(struct scatterlist *in, int nents, int nb_splits,
+			      off_t skip, const size_t *sizes,
+			      struct sg_splitter *splitters, bool mapped)
+{
+	int i;
+	unsigned int sglen;
+	size_t size = sizes[0], len;
+	struct sg_splitter *curr = splitters;
+	struct scatterlist *sg;
+
+	for (i = 0; i < nb_splits; i++) {
+		splitters[i].in_sg0 = NULL;
+		splitters[i].nents = 0;
+	}
+
+	for_each_sg(in, sg, nents, i) {
+		sglen = mapped ? sg_dma_len(sg) : sg->length;
+		if (skip > sglen) {
+			skip -= sglen;
+			continue;
+		}
+
+		len = min_t(size_t, size, sglen - skip);
+		if (!curr->in_sg0) {
+			curr->in_sg0 = sg;
+			curr->skip_sg0 = skip;
+		}
+		size -= len;
+		curr->nents++;
+		curr->length_last_sg = len;
+
+		while (!size && (skip + len < sglen) && (--nb_splits > 0)) {
+			curr++;
+			size = *(++sizes);
+			skip += len;
+			len = min_t(size_t, size, sglen - skip);
+
+			curr->in_sg0 = sg;
+			curr->skip_sg0 = skip;
+			curr->nents = 1;
+			curr->length_last_sg = len;
+			size -= len;
+		}
+		skip = 0;
+
+		if (!size && --nb_splits > 0) {
+			curr++;
+			size = *(++sizes);
+		}
+
+		if (!nb_splits)
+			break;
+	}
+
+	return (size || !splitters[0].in_sg0) ? -EINVAL : 0;
+}
+
+static void sg_split_phys(struct sg_splitter *splitters, const int nb_splits)
+{
+	int i, j;
+	struct scatterlist *in_sg, *out_sg;
+	struct sg_splitter *split;
+
+	for (i = 0, split = splitters; i < nb_splits; i++, split++) {
+		in_sg = split->in_sg0;
+		out_sg = split->out_sg;
+		for (j = 0; j < split->nents; j++, out_sg++) {
+			*out_sg = *in_sg;
+			if (!j) {
+				out_sg->offset += split->skip_sg0;
+				out_sg->length -= split->skip_sg0;
+			} else {
+				out_sg->offset = 0;
+			}
+			sg_dma_address(out_sg) = 0;
+			sg_dma_len(out_sg) = 0;
+			in_sg = sg_next(in_sg);
+		}
+		out_sg[-1].length = split->length_last_sg;
+		sg_mark_end(out_sg - 1);
+	}
+}
+
+static void sg_split_mapped(struct sg_splitter *splitters, const int nb_splits)
+{
+	int i, j;
+	struct scatterlist *in_sg, *out_sg;
+	struct sg_splitter *split;
+
+	for (i = 0, split = splitters; i < nb_splits; i++, split++) {
+		in_sg = split->in_sg0;
+		out_sg = split->out_sg;
+		for (j = 0; j < split->nents; j++, out_sg++) {
+			sg_dma_address(out_sg) = sg_dma_address(in_sg);
+			sg_dma_len(out_sg) = sg_dma_len(in_sg);
+			if (!j) {
+				sg_dma_address(out_sg) += split->skip_sg0;
+				sg_dma_len(out_sg) -= split->skip_sg0;
+			}
+			in_sg = sg_next(in_sg);
+		}
+		sg_dma_len(--out_sg) = split->length_last_sg;
+	}
+}
+
+/**
+ * sg_split - split a scatterlist into several scatterlists
+ * @in: the input sg list
+ * @in_mapped_nents: the result of a dma_map_sg(in, ...), or 0 if not mapped.
+ * @skip: the number of bytes to skip in the input sg list
+ * @nb_splits: the number of desired sg outputs
+ * @split_sizes: the respective size of each output sg list in bytes
+ * @out: an array where to store the allocated output sg lists
+ * @out_mapped_nents: the resulting sg lists mapped number of sg entries. Might
+ *                    be NULL if sglist not already mapped (in_mapped_nents = 0)
+ * @gfp_mask: the allocation flag
+ *
+ * This function splits the input sg list into nb_splits sg lists, which are
+ * allocated and stored into out.
+ * The @in is split into :
+ *  - @out[0], which covers bytes [@skip .. @skip + @split_sizes[0] - 1] of @in
+ *  - @out[1], which covers bytes [@skip + split_sizes[0] ..
+ *                                 @skip + @split_sizes[0] + @split_sizes[1] -1]
+ * etc ...
+ * It will be the caller's duty to kfree() out array members.
+ *
+ * Returns 0 upon success, or error code
+ */
+int sg_split(struct scatterlist *in, const int in_mapped_nents,
+	     const off_t skip, const int nb_splits,
+	     const size_t *split_sizes,
+	     struct scatterlist **out, int *out_mapped_nents,
+	     gfp_t gfp_mask)
+{
+	int i, ret;
+	struct sg_splitter *splitters;
+
+	splitters = kcalloc(nb_splits, sizeof(*splitters), gfp_mask);
+	if (!splitters)
+		return -ENOMEM;
+
+	ret = sg_calculate_split(in, sg_nents(in), nb_splits, skip, split_sizes,
+			   splitters, false);
+	if (ret < 0)
+		goto err;
+
+	ret = -ENOMEM;
+	for (i = 0; i < nb_splits; i++) {
+		splitters[i].out_sg = kmalloc_array(splitters[i].nents,
+						    sizeof(struct scatterlist),
+						    gfp_mask);
+		if (!splitters[i].out_sg)
+			goto err;
+	}
+
+	/*
+	 * The order of these 3 calls is important and should be kept.
+	 */
+	sg_split_phys(splitters, nb_splits);
+	ret = sg_calculate_split(in, in_mapped_nents, nb_splits, skip,
+				 split_sizes, splitters, true);
+	if (ret < 0)
+		goto err;
+	sg_split_mapped(splitters, nb_splits);
+
+	for (i = 0; i < nb_splits; i++) {
+		out[i] = splitters[i].out_sg;
+		if (out_mapped_nents)
+			out_mapped_nents[i] = splitters[i].nents;
+	}
+
+	kfree(splitters);
+	return 0;
+
+err:
+	for (i = 0; i < nb_splits; i++)
+		kfree(splitters[i].out_sg);
+	kfree(splitters);
+	return ret;
+}
+EXPORT_SYMBOL(sg_split);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 7f58c735d745..d1377390b3ad 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -18,10 +18,12 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/filter.h>
+#include <linux/bpf.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/random.h>
+#include <linux/highmem.h>
 
 /* General test specific settings */
 #define MAX_SUBTESTS	3
@@ -55,6 +57,7 @@
 /* Flags that can be passed to test cases */
 #define FLAG_NO_DATA		BIT(0)
 #define FLAG_EXPECTED_FAIL	BIT(1)
+#define FLAG_SKB_FRAG		BIT(2)
 
 enum {
 	CLASSIC  = BIT(6),	/* Old BPF instructions only. */
@@ -80,6 +83,7 @@ struct bpf_test {
 		__u32 result;
 	} test[MAX_SUBTESTS];
 	int (*fill_helper)(struct bpf_test *self);
+	__u8 frag_data[MAX_DATA];
 };
 
 /* Large test cases need separate allocation and fill handler. */
@@ -355,6 +359,81 @@ static int bpf_fill_ja(struct bpf_test *self)
 	return __bpf_fill_ja(self, 12, 9);
 }
 
+static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
+{
+	unsigned int len = BPF_MAXINSNS;
+	struct sock_filter *insn;
+	int i;
+
+	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+	if (!insn)
+		return -ENOMEM;
+
+	for (i = 0; i < len - 1; i += 2) {
+		insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0);
+		insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+					 SKF_AD_OFF + SKF_AD_CPU);
+	}
+
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee);
+
+	self->u.ptr.insns = insn;
+	self->u.ptr.len = len;
+
+	return 0;
+}
+
+#define PUSH_CNT 68
+/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+{
+	unsigned int len = BPF_MAXINSNS;
+	struct bpf_insn *insn;
+	int i = 0, j, k = 0;
+
+	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+	if (!insn)
+		return -ENOMEM;
+
+	insn[i++] = BPF_MOV64_REG(R6, R1);
+loop:
+	for (j = 0; j < PUSH_CNT; j++) {
+		insn[i++] = BPF_LD_ABS(BPF_B, 0);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
+		i++;
+		insn[i++] = BPF_MOV64_REG(R1, R6);
+		insn[i++] = BPF_MOV64_IMM(R2, 1);
+		insn[i++] = BPF_MOV64_IMM(R3, 2);
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 bpf_skb_vlan_push_proto.func - __bpf_call_base);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
+		i++;
+	}
+
+	for (j = 0; j < PUSH_CNT; j++) {
+		insn[i++] = BPF_LD_ABS(BPF_B, 0);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
+		i++;
+		insn[i++] = BPF_MOV64_REG(R1, R6);
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 bpf_skb_vlan_pop_proto.func - __bpf_call_base);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
+		i++;
+	}
+	if (++k < 5)
+		goto loop;
+
+	for (; i < len - 1; i++)
+		insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef);
+
+	insn[len - 1] = BPF_EXIT_INSN();
+
+	self->u.ptr.insns = insn;
+	self->u.ptr.len = len;
+
+	return 0;
+}
+
 static struct bpf_test tests[] = {
 	{
 		"TAX",
@@ -3674,6 +3753,9 @@ static struct bpf_test tests[] = {
 		.u.insns_int = {
 			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
 			BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+			BPF_ALU64_REG(BPF_MOV, R1, R0),
+			BPF_ALU64_IMM(BPF_RSH, R1, 32),
+			BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
 			BPF_EXIT_INSN(),
 		},
 		INTERNAL,
@@ -3708,6 +3790,9 @@ static struct bpf_test tests[] = {
 		.u.insns_int = {
 			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
 			BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+			BPF_ALU64_REG(BPF_MOV, R1, R0),
+			BPF_ALU64_IMM(BPF_RSH, R1, 32),
+			BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
 			BPF_EXIT_INSN(),
 		},
 		INTERNAL,
@@ -4392,6 +4477,618 @@ static struct bpf_test tests[] = {
 		{ { 0, 0xababcbac } },
 		.fill_helper = bpf_fill_maxinsns11,
 	},
+	{
+		"BPF_MAXINSNS: ld_abs+get_processor_id",
+		{ },
+		CLASSIC,
+		{ },
+		{ { 1, 0xbee } },
+		.fill_helper = bpf_fill_ld_abs_get_processor_id,
+	},
+	{
+		"BPF_MAXINSNS: ld_abs+vlan_push/pop",
+		{ },
+		INTERNAL,
+		{ 0x34 },
+		{ { 1, 0xbef } },
+		.fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+	},
+	/*
+	 * LD_IND / LD_ABS on fragmented SKBs
+	 */
+	{
+		"LD_IND byte frag",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x40),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ },
+		{ {0x40, 0x42} },
+		.frag_data = {
+			0x42, 0x00, 0x00, 0x00,
+			0x43, 0x44, 0x00, 0x00,
+			0x21, 0x07, 0x19, 0x83,
+		},
+	},
+	{
+		"LD_IND halfword frag",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x40),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x4),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ },
+		{ {0x40, 0x4344} },
+		.frag_data = {
+			0x42, 0x00, 0x00, 0x00,
+			0x43, 0x44, 0x00, 0x00,
+			0x21, 0x07, 0x19, 0x83,
+		},
+	},
+	{
+		"LD_IND word frag",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x40),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x8),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ },
+		{ {0x40, 0x21071983} },
+		.frag_data = {
+			0x42, 0x00, 0x00, 0x00,
+			0x43, 0x44, 0x00, 0x00,
+			0x21, 0x07, 0x19, 0x83,
+		},
+	},
+	{
+		"LD_IND halfword mixed head/frag",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x40),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ [0x3e] = 0x25, [0x3f] = 0x05, },
+		{ {0x40, 0x0519} },
+		.frag_data = { 0x19, 0x82 },
+	},
+	{
+		"LD_IND word mixed head/frag",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x40),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x2),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ [0x3e] = 0x25, [0x3f] = 0x05, },
+		{ {0x40, 0x25051982} },
+		.frag_data = { 0x19, 0x82 },
+	},
+	{
+		"LD_ABS byte frag",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x40),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ },
+		{ {0x40, 0x42} },
+		.frag_data = {
+			0x42, 0x00, 0x00, 0x00,
+			0x43, 0x44, 0x00, 0x00,
+			0x21, 0x07, 0x19, 0x83,
+		},
+	},
+	{
+		"LD_ABS halfword frag",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x44),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ },
+		{ {0x40, 0x4344} },
+		.frag_data = {
+			0x42, 0x00, 0x00, 0x00,
+			0x43, 0x44, 0x00, 0x00,
+			0x21, 0x07, 0x19, 0x83,
+		},
+	},
+	{
+		"LD_ABS word frag",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x48),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ },
+		{ {0x40, 0x21071983} },
+		.frag_data = {
+			0x42, 0x00, 0x00, 0x00,
+			0x43, 0x44, 0x00, 0x00,
+			0x21, 0x07, 0x19, 0x83,
+		},
+	},
+	{
+		"LD_ABS halfword mixed head/frag",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ [0x3e] = 0x25, [0x3f] = 0x05, },
+		{ {0x40, 0x0519} },
+		.frag_data = { 0x19, 0x82 },
+	},
+	{
+		"LD_ABS word mixed head/frag",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_SKB_FRAG,
+		{ [0x3e] = 0x25, [0x3f] = 0x05, },
+		{ {0x40, 0x25051982} },
+		.frag_data = { 0x19, 0x82 },
+	},
+	/*
+	 * LD_IND / LD_ABS on non fragmented SKBs
+	 */
+	{
+		/*
+		 * this tests that the JIT/interpreter correctly resets X
+		 * before using it in an LD_IND instruction.
+		 */
+		"LD_IND byte default X",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x1] = 0x42 },
+		{ {0x40, 0x42 } },
+	},
+	{
+		"LD_IND byte positive offset",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 } },
+	},
+	{
+		"LD_IND byte negative offset",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x05 } },
+	},
+	{
+		"LD_IND halfword positive offset",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x2),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+		},
+		{ {0x40, 0xdd88 } },
+	},
+	{
+		"LD_IND halfword negative offset",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x2),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+		},
+		{ {0x40, 0xbb66 } },
+	},
+	{
+		"LD_IND halfword unaligned",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+		},
+		{ {0x40, 0x66cc } },
+	},
+	{
+		"LD_IND word positive offset",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x4),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xee99ffaa } },
+	},
+	{
+		"LD_IND word negative offset",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x4),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xaa55bb66 } },
+	},
+	{
+		"LD_IND word unaligned (addr & 3 == 2)",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x2),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xbb66cc77 } },
+	},
+	{
+		"LD_IND word unaligned (addr & 3 == 1)",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0x55bb66cc } },
+	},
+	{
+		"LD_IND word unaligned (addr & 3 == 3)",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0x66cc77dd } },
+	},
+	{
+		"LD_ABS byte",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xcc } },
+	},
+	{
+		"LD_ABS halfword",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xdd88 } },
+	},
+	{
+		"LD_ABS halfword unaligned",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x25),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0x99ff } },
+	},
+	{
+		"LD_ABS word",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xaa55bb66 } },
+	},
+	{
+		"LD_ABS word unaligned (addr & 3 == 2)",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x22),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0xdd88ee99 } },
+	},
+	{
+		"LD_ABS word unaligned (addr & 3 == 1)",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x21),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0x77dd88ee } },
+	},
+	{
+		"LD_ABS word unaligned (addr & 3 == 3)",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x23),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{
+			[0x1c] = 0xaa, [0x1d] = 0x55,
+			[0x1e] = 0xbb, [0x1f] = 0x66,
+			[0x20] = 0xcc, [0x21] = 0x77,
+			[0x22] = 0xdd, [0x23] = 0x88,
+			[0x24] = 0xee, [0x25] = 0x99,
+			[0x26] = 0xff, [0x27] = 0xaa,
+		},
+		{ {0x40, 0x88ee99ff } },
+	},
+	/*
+	 * verify that the interpreter or JIT correctly sets A and X
+	 * to 0.
+	 */
+	{
+		"ADD default X",
+		.u.insns = {
+			/*
+			 * A = 0x42
+			 * A = A + X
+			 * ret A
+			 */
+			BPF_STMT(BPF_LD | BPF_IMM, 0x42),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x42 } },
+	},
+	{
+		"ADD default A",
+		.u.insns = {
+			/*
+			 * A = A + 0x42
+			 * ret A
+			 */
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 0x42),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x42 } },
+	},
+	{
+		"SUB default X",
+		.u.insns = {
+			/*
+			 * A = 0x66
+			 * A = A - X
+			 * ret A
+			 */
+			BPF_STMT(BPF_LD | BPF_IMM, 0x66),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x66 } },
+	},
+	{
+		"SUB default A",
+		.u.insns = {
+			/*
+			 * A = A - -0x66
+			 * ret A
+			 */
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, -0x66),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x66 } },
+	},
+	{
+		"MUL default X",
+		.u.insns = {
+			/*
+			 * A = 0x42
+			 * A = A * X
+			 * ret A
+			 */
+			BPF_STMT(BPF_LD | BPF_IMM, 0x42),
+			BPF_STMT(BPF_ALU | BPF_MUL | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x0 } },
+	},
+	{
+		"MUL default A",
+		.u.insns = {
+			/*
+			 * A = A * 0x66
+			 * ret A
+			 */
+			BPF_STMT(BPF_ALU | BPF_MUL | BPF_K, 0x66),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x0 } },
+	},
+	{
+		"DIV default X",
+		.u.insns = {
+			/*
+			 * A = 0x42
+			 * A = A / X ; this halt the filter execution if X is 0
+			 * ret 0x42
+			 */
+			BPF_STMT(BPF_LD | BPF_IMM, 0x42),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0x42),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x0 } },
+	},
+	{
+		"DIV default A",
+		.u.insns = {
+			/*
+			 * A = A / 1
+			 * ret A
+			 */
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x0 } },
+	},
+	{
+		"JMP EQ default A",
+		.u.insns = {
+			/*
+			 * cmp A, 0x0, 0, 1
+			 * ret 0x42
+			 * ret 0x66
+			 */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 0x42),
+			BPF_STMT(BPF_RET | BPF_K, 0x66),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x42 } },
+	},
+	{
+		"JMP EQ default X",
+		.u.insns = {
+			/*
+			 * A = 0x0
+			 * cmp A, X, 0, 1
+			 * ret 0x42
+			 * ret 0x66
+			 */
+			BPF_STMT(BPF_LD | BPF_IMM, 0x0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0x0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 0x42),
+			BPF_STMT(BPF_RET | BPF_K, 0x66),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ {0x1, 0x42 } },
+	},
 };
 
 static struct net_device dev;
@@ -4427,6 +5124,9 @@ static struct sk_buff *populate_skb(char *buf, int size)
 
 static void *generate_test_data(struct bpf_test *test, int sub)
 {
+	struct sk_buff *skb;
+	struct page *page;
+
 	if (test->aux & FLAG_NO_DATA)
 		return NULL;
 
@@ -4434,7 +5134,38 @@ static void *generate_test_data(struct bpf_test *test, int sub)
 	 * subtests generate skbs of different sizes based on
 	 * the same data.
 	 */
-	return populate_skb(test->data, test->test[sub].data_size);
+	skb = populate_skb(test->data, test->test[sub].data_size);
+	if (!skb)
+		return NULL;
+
+	if (test->aux & FLAG_SKB_FRAG) {
+		/*
+		 * when the test requires a fragmented skb, add a
+		 * single fragment to the skb, filled with
+		 * test->frag_data.
+		 */
+		void *ptr;
+
+		page = alloc_page(GFP_KERNEL);
+
+		if (!page)
+			goto err_kfree_skb;
+
+		ptr = kmap(page);
+		if (!ptr)
+			goto err_free_page;
+		memcpy(ptr, test->frag_data, MAX_DATA);
+		kunmap(page);
+		skb_add_rx_frag(skb, 0, page, 0, MAX_DATA, MAX_DATA);
+	}
+
+	return skb;
+
+err_free_page:
+	__free_page(page);
+err_kfree_skb:
+	kfree_skb(skb);
+	return NULL;
 }
 
 static void release_test_data(const struct bpf_test *test, void *data)
@@ -4515,6 +5246,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		}
 
 		fp->len = flen;
+		/* Type doesn't really matter here as long as it's not unspec. */
+		fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
 		memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn));
 
 		bpf_prog_select_runtime(fp);
@@ -4545,14 +5278,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
 	u64 start, finish;
 	int ret = 0, i;
 
-	start = ktime_to_us(ktime_get());
+	start = ktime_get_ns();
 
 	for (i = 0; i < runs; i++)
 		ret = BPF_PROG_RUN(fp, data);
 
-	finish = ktime_to_us(ktime_get());
+	finish = ktime_get_ns();
 
-	*duration = (finish - start) * 1000ULL;
+	*duration = finish - start;
 	do_div(*duration, runs);
 
 	return ret;
@@ -4572,6 +5305,11 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
 			break;
 
 		data = generate_test_data(test, i);
+		if (!data && !(test->aux & FLAG_NO_DATA)) {
+			pr_cont("data generation failed ");
+			err_cnt++;
+			break;
+		}
 		ret = __run_one(fp, data, runs, &duration);
 		release_test_data(test, data);
 
@@ -4587,10 +5325,73 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
 	return err_cnt;
 }
 
+static char test_name[64];
+module_param_string(test_name, test_name, sizeof(test_name), 0);
+
+static int test_id = -1;
+module_param(test_id, int, 0);
+
+static int test_range[2] = { 0, ARRAY_SIZE(tests) - 1 };
+module_param_array(test_range, int, NULL, 0);
+
+static __init int find_test_index(const char *test_name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (!strcmp(tests[i].descr, test_name))
+			return i;
+	}
+	return -1;
+}
+
 static __init int prepare_bpf_tests(void)
 {
 	int i;
 
+	if (test_id >= 0) {
+		/*
+		 * if a test_id was specified, use test_range to
+		 * cover only that test.
+		 */
+		if (test_id >= ARRAY_SIZE(tests)) {
+			pr_err("test_bpf: invalid test_id specified.\n");
+			return -EINVAL;
+		}
+
+		test_range[0] = test_id;
+		test_range[1] = test_id;
+	} else if (*test_name) {
+		/*
+		 * if a test_name was specified, find it and setup
+		 * test_range to cover only that test.
+		 */
+		int idx = find_test_index(test_name);
+
+		if (idx < 0) {
+			pr_err("test_bpf: no test named '%s' found.\n",
+			       test_name);
+			return -EINVAL;
+		}
+		test_range[0] = idx;
+		test_range[1] = idx;
+	} else {
+		/*
+		 * check that the supplied test_range is valid.
+		 */
+		if (test_range[0] >= ARRAY_SIZE(tests) ||
+		    test_range[1] >= ARRAY_SIZE(tests) ||
+		    test_range[0] < 0 || test_range[1] < 0) {
+			pr_err("test_bpf: test_range is out of bound.\n");
+			return -EINVAL;
+		}
+
+		if (test_range[1] < test_range[0]) {
+			pr_err("test_bpf: test_range is ending before it starts.\n");
+			return -EINVAL;
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		if (tests[i].fill_helper &&
 		    tests[i].fill_helper(&tests[i]) < 0)
@@ -4610,6 +5411,11 @@ static __init void destroy_bpf_tests(void)
 	}
 }
 
+static bool exclude_test(int test_id)
+{
+	return test_id < test_range[0] || test_id > test_range[1];
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -4619,6 +5425,9 @@ static __init int test_bpf(void)
 		struct bpf_prog *fp;
 		int err;
 
+		if (exclude_test(i))
+			continue;
+
 		pr_info("#%d %s ", i, tests[i].descr);
 
 		fp = generate_filter(i, &err);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index c90777eae1f8..8c1ad1ced72c 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -16,10 +16,14 @@
 #include <linux/init.h>
 #include <linux/jhash.h>
 #include <linux/kernel.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/rhashtable.h>
+#include <linux/semaphore.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #define MAX_ENTRIES	1000000
 #define TEST_INSERT_FAIL INT_MAX
@@ -44,11 +48,21 @@ static int size = 8;
 module_param(size, int, 0);
 MODULE_PARM_DESC(size, "Initial size hint of table (default: 8)");
 
+static int tcount = 10;
+module_param(tcount, int, 0);
+MODULE_PARM_DESC(tcount, "Number of threads to spawn (default: 10)");
+
 struct test_obj {
 	int			value;
 	struct rhash_head	node;
 };
 
+struct thread_data {
+	int id;
+	struct task_struct *task;
+	struct test_obj *objs;
+};
+
 static struct test_obj array[MAX_ENTRIES];
 
 static struct rhashtable_params test_rht_params = {
@@ -59,6 +73,9 @@ static struct rhashtable_params test_rht_params = {
 	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
+static struct semaphore prestart_sem;
+static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
+
 static int __init test_rht_lookup(struct rhashtable *ht)
 {
 	unsigned int i;
@@ -87,6 +104,8 @@ static int __init test_rht_lookup(struct rhashtable *ht)
 				return -EINVAL;
 			}
 		}
+
+		cond_resched_rcu();
 	}
 
 	return 0;
@@ -160,6 +179,8 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 		} else if (err) {
 			return err;
 		}
+
+		cond_resched();
 	}
 
 	if (insert_fails)
@@ -183,6 +204,8 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 
 			rhashtable_remove_fast(ht, &obj->node, test_rht_params);
 		}
+
+		cond_resched();
 	}
 
 	end = ktime_get_ns();
@@ -193,10 +216,97 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 
 static struct rhashtable ht;
 
+static int thread_lookup_test(struct thread_data *tdata)
+{
+	int i, err = 0;
+
+	for (i = 0; i < entries; i++) {
+		struct test_obj *obj;
+		int key = (tdata->id << 16) | i;
+
+		obj = rhashtable_lookup_fast(&ht, &key, test_rht_params);
+		if (obj && (tdata->objs[i].value == TEST_INSERT_FAIL)) {
+			pr_err("  found unexpected object %d\n", key);
+			err++;
+		} else if (!obj && (tdata->objs[i].value != TEST_INSERT_FAIL)) {
+			pr_err("  object %d not found!\n", key);
+			err++;
+		} else if (obj && (obj->value != key)) {
+			pr_err("  wrong object returned (got %d, expected %d)\n",
+			       obj->value, key);
+			err++;
+		}
+	}
+	return err;
+}
+
+static int threadfunc(void *data)
+{
+	int i, step, err = 0, insert_fails = 0;
+	struct thread_data *tdata = data;
+
+	up(&prestart_sem);
+	if (down_interruptible(&startup_sem))
+		pr_err("  thread[%d]: down_interruptible failed\n", tdata->id);
+
+	for (i = 0; i < entries; i++) {
+		tdata->objs[i].value = (tdata->id << 16) | i;
+		err = rhashtable_insert_fast(&ht, &tdata->objs[i].node,
+		                             test_rht_params);
+		if (err == -ENOMEM || err == -EBUSY) {
+			tdata->objs[i].value = TEST_INSERT_FAIL;
+			insert_fails++;
+		} else if (err) {
+			pr_err("  thread[%d]: rhashtable_insert_fast failed\n",
+			       tdata->id);
+			goto out;
+		}
+	}
+	if (insert_fails)
+		pr_info("  thread[%d]: %d insert failures\n",
+		        tdata->id, insert_fails);
+
+	err = thread_lookup_test(tdata);
+	if (err) {
+		pr_err("  thread[%d]: rhashtable_lookup_test failed\n",
+		       tdata->id);
+		goto out;
+	}
+
+	for (step = 10; step > 0; step--) {
+		for (i = 0; i < entries; i += step) {
+			if (tdata->objs[i].value == TEST_INSERT_FAIL)
+				continue;
+			err = rhashtable_remove_fast(&ht, &tdata->objs[i].node,
+			                             test_rht_params);
+			if (err) {
+				pr_err("  thread[%d]: rhashtable_remove_fast failed\n",
+				       tdata->id);
+				goto out;
+			}
+			tdata->objs[i].value = TEST_INSERT_FAIL;
+		}
+		err = thread_lookup_test(tdata);
+		if (err) {
+			pr_err("  thread[%d]: rhashtable_lookup_test (2) failed\n",
+			       tdata->id);
+			goto out;
+		}
+	}
+out:
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	return err;
+}
+
 static int __init test_rht_init(void)
 {
-	int i, err;
+	int i, err, started_threads = 0, failed_threads = 0;
 	u64 total_time = 0;
+	struct thread_data *tdata;
+	struct test_obj *objs;
 
 	entries = min(entries, MAX_ENTRIES);
 
@@ -232,6 +342,57 @@ static int __init test_rht_init(void)
 	do_div(total_time, runs);
 	pr_info("Average test time: %llu\n", total_time);
 
+	if (!tcount)
+		return 0;
+
+	pr_info("Testing concurrent rhashtable access from %d threads\n",
+	        tcount);
+	sema_init(&prestart_sem, 1 - tcount);
+	tdata = vzalloc(tcount * sizeof(struct thread_data));
+	if (!tdata)
+		return -ENOMEM;
+	objs  = vzalloc(tcount * entries * sizeof(struct test_obj));
+	if (!objs) {
+		vfree(tdata);
+		return -ENOMEM;
+	}
+
+	err = rhashtable_init(&ht, &test_rht_params);
+	if (err < 0) {
+		pr_warn("Test failed: Unable to initialize hashtable: %d\n",
+			err);
+		vfree(tdata);
+		vfree(objs);
+		return -EINVAL;
+	}
+	for (i = 0; i < tcount; i++) {
+		tdata[i].id = i;
+		tdata[i].objs = objs + i * entries;
+		tdata[i].task = kthread_run(threadfunc, &tdata[i],
+		                            "rhashtable_thrad[%d]", i);
+		if (IS_ERR(tdata[i].task))
+			pr_err(" kthread_run failed for thread %d\n", i);
+		else
+			started_threads++;
+	}
+	if (down_interruptible(&prestart_sem))
+		pr_err("  down interruptible failed\n");
+	for (i = 0; i < tcount; i++)
+		up(&startup_sem);
+	for (i = 0; i < tcount; i++) {
+		if (IS_ERR(tdata[i].task))
+			continue;
+		if ((err = kthread_stop(tdata[i].task))) {
+			pr_warn("Test failed: thread %d returned: %d\n",
+			        i, err);
+			failed_threads++;
+		}
+	}
+	pr_info("Started %d threads, %d failed\n",
+	        started_threads, failed_threads);
+	rhashtable_destroy(&ht);
+	vfree(tdata);
+	vfree(objs);
 	return 0;
 }
 
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index da39c608a28c..95cd63b43b99 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,6 +17,7 @@
  */
 
 #include <stdarg.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/module.h>	/* for KSYM_SYMBOL_LEN */
 #include <linux/types.h>