From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 23 Aug 2014 17:03:28 +0200
Subject: random32: improvements to prandom_bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch addresses a couple of minor items, mostly addesssing
prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t
for length arguments, 2) We can use put_unaligned() when filling
the array instead of open coding it [ perhaps some archs will
further benefit from their own arch specific implementation when
GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned
int as type for getting the arch seed, 5) Make use of
prandom_u32_max() for timer slack.

Regarding the change to put_unaligned(), callers of prandom_bytes()
which internally invoke prandom_bytes_state(), don't bother as
they expect the array to be filled randomly and don't have any
control of the internal state what-so-ever (that's also why we
have periodic reseeding there, etc), so they really don't care.

Now for the direct callers of prandom_bytes_state(), which
are solely located in test cases for MTD devices, that is,
drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}:

These tests basically fill a test write-vector through
prandom_bytes_state() with an a-priori defined seed each time
and write that to a MTD device. Later on, they set up a read-vector
and read back that blocks from the device. So in the verification
phase, the write-vector is being re-setup [ so same seed and
prandom_bytes_state() called ], and then memcmp()'ed against the
read-vector to check if the data is the same.

Akinobu, Lothar and I also tested this patch and it runs through
the 3 relevant MTD test cases w/o any errors on the nandsim device
(simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53
and i.MX6):

  # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \
                     third_id_byte=0x00 fourth_id_byte=0x15
  # modprobe mtd_oobtest dev=0
  # modprobe mtd_pagetest dev=0
  # modprobe mtd_subpagetest dev=0

We also don't have any users depending directly on a particular
result of the PRNG (except the PRNG self-test itself), and that's
just fine as it e.g. allowed us easily to do things like upgrading
from taus88 to taus113.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
Tested-by: Lothar Waßmann <LW@KARO-electronics.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/random32.c | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/random32.c b/lib/random32.c
index c9b6bf3afe0c..0bee183fa18f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -37,6 +37,7 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <asm/unaligned.h>
 
 #ifdef CONFIG_RANDOM32_SELFTEST
 static void __init prandom_state_selftest(void);
@@ -96,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32);
  *	This is used for pseudo-randomness with no outside seeding.
  *	For more random results, use prandom_bytes().
  */
-void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
 {
-	unsigned char *p = buf;
-	int i;
-
-	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
-		u32 random = prandom_u32_state(state);
-		int j;
+	u8 *ptr = buf;
 
-		for (j = 0; j < sizeof(u32); j++) {
-			p[i + j] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	while (bytes >= sizeof(u32)) {
+		put_unaligned(prandom_u32_state(state), (u32 *) ptr);
+		ptr += sizeof(u32);
+		bytes -= sizeof(u32);
 	}
-	if (i < bytes) {
-		u32 random = prandom_u32_state(state);
 
-		for (; i < bytes; i++) {
-			p[i] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	if (bytes > 0) {
+		u32 rem = prandom_u32_state(state);
+		do {
+			*ptr++ = (u8) rem;
+			bytes--;
+			rem >>= BITS_PER_BYTE;
+		} while (bytes > 0);
 	}
 }
 EXPORT_SYMBOL(prandom_bytes_state);
@@ -126,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state);
  *	@buf: where to copy the pseudo-random bytes to
  *	@bytes: the requested number of bytes
  */
-void prandom_bytes(void *buf, int bytes)
+void prandom_bytes(void *buf, size_t bytes)
 {
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
 
@@ -137,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes);
 
 static void prandom_warmup(struct rnd_state *state)
 {
-	/* Calling RNG ten times to satify recurrence condition */
+	/* Calling RNG ten times to satisfy recurrence condition */
 	prandom_u32_state(state);
 	prandom_u32_state(state);
 	prandom_u32_state(state);
@@ -152,7 +149,7 @@ static void prandom_warmup(struct rnd_state *state)
 
 static u32 __extract_hwseed(void)
 {
-	u32 val = 0;
+	unsigned int val = 0;
 
 	(void)(arch_get_random_seed_int(&val) ||
 	       arch_get_random_int(&val));
@@ -228,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare)
 	prandom_seed(entropy);
 
 	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	expires = 40 + (prandom_u32() % 40);
+	expires = 40 + prandom_u32_max(40);
 	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
 
 	add_timer(&seed_timer);
-- 
cgit v1.2.3


From 72b603ee8cfc6be587f301568d79ce38e7ed735d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Mon, 25 Aug 2014 12:27:02 -0700
Subject: bpf: x86: add missing 'shift by register' instructions to x64 eBPF
 JIT

'shift by register' operations are supported by eBPF interpreter, but were
accidently left out of x64 JIT compiler. Fix it and add a testcase.

Reported-by: Brendan Gregg <brendan.d.gregg@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'lib')

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 89e0345733bd..8c66c6aace04 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1341,6 +1341,44 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, -1 } }
 	},
+	{
+		"INT: shifts by register",
+		.u.insns_int = {
+			BPF_MOV64_IMM(R0, -1234),
+			BPF_MOV64_IMM(R1, 1),
+			BPF_ALU32_REG(BPF_RSH, R0, R1),
+			BPF_JMP_IMM(BPF_JEQ, R0, 0x7ffffd97, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R2, 1),
+			BPF_ALU64_REG(BPF_LSH, R0, R2),
+			BPF_MOV32_IMM(R4, -1234),
+			BPF_JMP_REG(BPF_JEQ, R0, R4, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_AND, R4, 63),
+			BPF_ALU64_REG(BPF_LSH, R0, R4), /* R0 <= 46 */
+			BPF_MOV64_IMM(R3, 47),
+			BPF_ALU64_REG(BPF_ARSH, R0, R3),
+			BPF_JMP_IMM(BPF_JEQ, R0, -617, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R2, 1),
+			BPF_ALU64_REG(BPF_LSH, R4, R2), /* R4 = 46 << 1 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 92, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R4, 4),
+			BPF_ALU64_REG(BPF_LSH, R4, R4), /* R4 = 4 << 4 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 64, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R4, 5),
+			BPF_ALU32_REG(BPF_LSH, R4, R4), /* R4 = 5 << 5 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 160, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R0, -1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } }
+	},
 	{
 		"INT: DIV + ABS",
 		.u.insns_int = {
-- 
cgit v1.2.3


From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 3 Sep 2014 09:22:36 +0800
Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking

Although rhashtable library allows user to specify a quiet big size
for user's created hash table, the table may be shrunk to a
very small size - HASH_MIN_SIZE(4) after object is removed from
the table at the first time. Subsequently, even if the total amount
of objects saved in the table is quite lower than user's initial
setting in a long time, the hash table size is still dynamically
adjusted by rhashtable_shrink() or rhashtable_expand() each time
object is inserted or removed from the table. However, as
synchronize_rcu() has to be called when table is shrunk or
expanded by the two functions, we should permit user to set the
minimum table size through configuring the minimum number of shifts
according to user specific requirement, avoiding these expensive
actions of shrinking or expanding because of calling synchronize_rcu().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a2c78810ebc1..8dfec3f26d4c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -298,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	if (tbl->size <= HASH_MIN_SIZE)
+	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
 	ntbl = bucket_table_alloc(tbl->size / 2, flags);
@@ -506,9 +506,10 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
 
-static size_t rounded_hashtable_size(unsigned int nelem)
+static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
-	return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE);
+	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+		   1UL << params->min_shift);
 }
 
 /**
@@ -566,8 +567,11 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	    (!params->key_len && !params->obj_hashfn))
 		return -EINVAL;
 
+	params->min_shift = max_t(size_t, params->min_shift,
+				  ilog2(HASH_MIN_SIZE));
+
 	if (params->nelem_hint)
-		size = rounded_hashtable_size(params->nelem_hint);
+		size = rounded_hashtable_size(params);
 
 	tbl = bucket_table_alloc(size, GFP_KERNEL);
 	if (tbl == NULL)
-- 
cgit v1.2.3


From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 2 Sep 2014 22:53:44 +0200
Subject: net: bpf: make eBPF interpreter images read-only

With eBPF getting more extended and exposure to user space is on it's way,
hardening the memory range the interpreter uses to steer its command flow
seems appropriate.  This patch moves the to be interpreted bytecode to
read-only pages.

In case we execute a corrupted BPF interpreter image for some reason e.g.
caused by an attacker which got past a verifier stage, it would not only
provide arbitrary read/write memory access but arbitrary function calls
as well. After setting up the BPF interpreter image, its contents do not
change until destruction time, thus we can setup the image on immutable
made pages in order to mitigate modifications to that code. The idea
is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks").

This is possible because bpf_prog is not part of sk_filter anymore.
After setup bpf_prog cannot be altered during its life-time. This prevents
any modifications to the entire bpf_prog structure (incl. function/JIT
image pointer).

Every eBPF program (including classic BPF that are migrated) have to call
bpf_prog_select_runtime() to select either interpreter or a JIT image
as a last setup step, and they all are being freed via bpf_prog_free(),
including non-JIT. Therefore, we can easily integrate this into the
eBPF life-time, plus since we directly allocate a bpf_prog, we have no
performance penalty.

Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual
inspection of kernel_page_tables.  Brad Spengler proposed the same idea
via Twitter during development of this patch.

Joint work with Hannes Frederic Sowa.

Suggested-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8c66c6aace04..9a67456ba29a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		break;
 
 	case INTERNAL:
-		fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
+		fp = bpf_prog_alloc(bpf_prog_size(flen), 0);
 		if (fp == NULL) {
 			pr_cont("UNEXPECTED_FAIL no memory left\n");
 			*err = -ENOMEM;
-- 
cgit v1.2.3


From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:17 -0700
Subject: net: filter: add "load 64-bit immediate" eBPF instruction

add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register.
All previous instructions were 8-byte. This is first 16-byte instruction.
Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction:
insn[0].code = BPF_LD | BPF_DW | BPF_IMM
insn[0].dst_reg = destination register
insn[0].imm = lower 32-bit
insn[1].code = 0
insn[1].imm = upper 32-bit
All unused fields must be zero.

Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM
which loads 32-bit immediate value into a register.

x64 JITs it as single 'movabsq %rax, imm64'
arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn

Note that old eBPF programs are binary compatible with new interpreter.

It helps eBPF programs load 64-bit constant into a register with one
instruction instead of using two registers and 4 instructions:
BPF_MOV32_IMM(R1, imm32)
BPF_ALU64_IMM(BPF_LSH, R1, 32)
BPF_MOV32_IMM(R2, imm32)
BPF_ALU64_REG(BPF_OR, R1, R2)

User space generated programs will use this instruction to load constants only.

To tell kernel that user space needs a pointer the _pseudo_ variant of
this instruction may be added later, which will use extra bits of encoding
to indicate what type of pointer user space is asking kernel to provide.
For example 'off' or 'src_reg' fields can be used for such purpose.
src_reg = 1 could mean that user space is asking kernel to validate and
load in-kernel map pointer.
src_reg = 2 could mean that user space needs readonly data section pointer
src_reg = 3 could mean that user space needs a pointer to per-cpu local data
All such future pseudo instructions will not be carrying the actual pointer
as part of the instruction, but rather will be treated as a request to kernel
to provide one. The kernel will verify the request_for_a_pointer, then
will drop _pseudo_ marking and will store actual internal pointer inside
the instruction, so the end result is the interpreter and JITs never
see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that
loads 64-bit immediate into a register. User space never operates on direct
pointers and verifier can easily recognize request_for_pointer vs other
instructions.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'lib')

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 9a67456ba29a..413890815d3e 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 1, 0 } },
 	},
+	{
+		"load 64-bit immediate",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x567800001234L),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_ALU64_IMM(BPF_LSH, R3, 32),
+			BPF_ALU64_IMM(BPF_RSH, R3, 32),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
 };
 
 static struct net_device dev;
-- 
cgit v1.2.3


From 25ee7327d04bc3ff41a7a5ac42d74226f8d60ac6 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 19 Sep 2014 13:53:51 -0700
Subject: net: bpf: fix compiler warnings in test_bpf

old gcc 4.2 used by avr32 architecture produces warnings:

lib/test_bpf.c:1741: warning: integer constant is too large for 'long' type
lib/test_bpf.c:1741: warning: integer constant is too large for 'long' type
lib/test_bpf.c: In function '__run_one':
lib/test_bpf.c:1897: warning: 'ret' may be used uninitialized in this function

silence these warnings.

Fixes: 02ab695bb37e ("net: filter: add "load 64-bit immediate" eBPF instruction")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 413890815d3e..23e070bcf72d 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1738,7 +1738,7 @@ static struct bpf_test tests[] = {
 	{
 		"load 64-bit immediate",
 		.u.insns_int = {
-			BPF_LD_IMM64(R1, 0x567800001234L),
+			BPF_LD_IMM64(R1, 0x567800001234LL),
 			BPF_MOV64_REG(R2, R1),
 			BPF_MOV64_REG(R3, R2),
 			BPF_ALU64_IMM(BPF_RSH, R2, 32),
@@ -1894,7 +1894,7 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
 		     int runs, u64 *duration)
 {
 	u64 start, finish;
-	int ret, i;
+	int ret = 0, i;
 
 	start = ktime_to_us(ktime_get());
 
-- 
cgit v1.2.3


From 3c731eba48e1b0650decfc91a839b80f0e05ce8f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:07 -0700
Subject: bpf: mini eBPF library, test stubs and verifier testsuite

1.
the library includes a trivial set of BPF syscall wrappers:
int bpf_create_map(int key_size, int value_size, int max_entries);
int bpf_update_elem(int fd, void *key, void *value);
int bpf_lookup_elem(int fd, void *key, void *value);
int bpf_delete_elem(int fd, void *key);
int bpf_get_next_key(int fd, void *key, void *next_key);
int bpf_prog_load(enum bpf_prog_type prog_type,
		  const struct sock_filter_int *insns, int insn_len,
		  const char *license);
bpf_prog_load() stores verifier log into global bpf_log_buf[] array

and BPF_*() macros to build instructions

2.
test stubs configure eBPF infra with 'unspec' map and program types.
These are fake types used by user space testsuite only.

3.
verifier tests valid and invalid programs and expects predefined
error log messages from kernel.
40 tests so far.

$ sudo ./test_verifier
 #0 add+sub+mul OK
 #1 unreachable OK
 #2 unreachable2 OK
 #3 out of range jump OK
 #4 out of range jump2 OK
 #5 test1 ld_imm64 OK
 ...

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Kconfig.debug | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a28590083622..3ac43f34437b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1672,7 +1672,8 @@ config TEST_BPF
 	  against the BPF interpreter or BPF JIT compiler depending on the
 	  current setting. This is in particular useful for BPF JIT compiler
 	  development, but also to run regression tests against changes in
-	  the interpreter code.
+	  the interpreter code. It also enables test stubs for eBPF maps and
+	  verifier used by user space verifier testsuite.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3