From 39b759cad5da88ee8ac55e186a00da4692b44e17 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 14 Nov 2017 13:15:33 -0700
Subject: tools/hv: add install target to Makefile

Makefiles usually come with 'install' target included so each distro
doesn't need to implement the procedure from scratch. Add it to tools/hv.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/hv/Makefile | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 31503819454d..1139d71fa0cf 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -7,9 +7,30 @@ CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
 
 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
 
-all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+sbindir ?= /usr/sbin
+libexecdir ?= /usr/libexec
+sharedstatedir ?= /var/lib
+
+ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+
+ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
+
+all: $(ALL_PROGRAMS)
+
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^
 
 clean:
 	$(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+
+install: all
+	install -d -m 755 $(DESTDIR)$(sbindir); \
+	install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
+	install -d -m 755 $(DESTDIR)$(sharedstatedir); \
+	for program in $(ALL_PROGRAMS); do \
+		install $$program -m 755 $(DESTDIR)$(sbindir);	\
+	done; \
+	install -m 755 lsvmbus $(DESTDIR)$(sbindir); \
+	for script in $(ALL_SCRIPTS); do \
+		install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
+	done
-- 
cgit v1.2.3


From 65c79230576873b312c3599479c1e42355c9f349 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 20 Nov 2017 09:45:35 -0800
Subject: test_firmware: fix setting old custom fw path back on exit

The file /sys/module/firmware_class/parameters/path can be used
to set a custom firmware path. The fw_filesystem.sh script creates
a temporary directory to add a test firmware file to be used during
testing, in order for this to work it uses the custom path syfs file
and it was supposed to reset back the file on execution exit. The
script failed to do this due to a typo, it was using OLD_PATH instead
of OLD_FWPATH, since its inception since v3.17.

Its not as easy to just keep the old setting, it turns out that
resetting an empty setting won't actually do what we want, we need
to check if it was empty and set an empty space.

Without this we end up having the temporary path always set after
we run these tests.

Fixes: 0a8adf58475 ("test: add firmware_class loader test")
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index b1f20fef36c7..f9508e1a4058 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -45,7 +45,10 @@ test_finish()
 	if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
 		echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
 	fi
-	echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path
+	if [ "$OLD_FWPATH" = "" ]; then
+		OLD_FWPATH=" "
+	fi
+	echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
 	rm -f "$FW"
 	rmdir "$FWPATH"
 }
-- 
cgit v1.2.3


From 881c23de02fe7d64489577fcc95912fdf724d966 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 20 Nov 2017 10:23:59 -0800
Subject: test_firmware: wrap sysfs timeout test into helper

This cannot run on all kernel builds. This will help us later
skip this test on kernel configs where non-applicable.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_fallback.sh | 69 +++++++++++++------------
 1 file changed, 37 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 34a42c68ebfb..e364631837d6 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -175,39 +175,44 @@ trap "test_finish" EXIT
 echo "ABCD0123" >"$FW"
 NAME=$(basename "$FW")
 
-DEVPATH="$DIR"/"nope-$NAME"/loading
-
-# Test failure when doing nothing (timeout works).
-echo -n 2 >/sys/class/firmware/timeout
-echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
-
-# Give the kernel some time to load the loading file, must be less
-# than the timeout above.
-sleep 1
-if [ ! -f $DEVPATH ]; then
-	echo "$0: fallback mechanism immediately cancelled"
-	echo ""
-	echo "The file never appeared: $DEVPATH"
-	echo ""
-	echo "This might be a distribution udev rule setup by your distribution"
-	echo "to immediately cancel all fallback requests, this must be"
-	echo "removed before running these tests. To confirm look for"
-	echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
-	echo "and see if you have something like this:"
-	echo ""
-	echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
-	echo ""
-	echo "If you do remove this file or comment out this line before"
-	echo "proceeding with these tests."
-	exit 1
-fi
+test_syfs_timeout()
+{
+	DEVPATH="$DIR"/"nope-$NAME"/loading
+
+	# Test failure when doing nothing (timeout works).
+	echo -n 2 >/sys/class/firmware/timeout
+	echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+	# Give the kernel some time to load the loading file, must be less
+	# than the timeout above.
+	sleep 1
+	if [ ! -f $DEVPATH ]; then
+		echo "$0: fallback mechanism immediately cancelled"
+		echo ""
+		echo "The file never appeared: $DEVPATH"
+		echo ""
+		echo "This might be a distribution udev rule setup by your distribution"
+		echo "to immediately cancel all fallback requests, this must be"
+		echo "removed before running these tests. To confirm look for"
+		echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+		echo "and see if you have something like this:"
+		echo ""
+		echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+		echo ""
+		echo "If you do remove this file or comment out this line before"
+		echo "proceeding with these tests."
+		exit 1
+	fi
 
-if diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was not expected to match" >&2
-	exit 1
-else
-	echo "$0: timeout works"
-fi
+	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not expected to match" >&2
+		exit 1
+	else
+		echo "$0: timeout works"
+	fi
+}
+
+test_syfs_timeout
 
 # Put timeout high enough for us to do work but not so long that failures
 # slow down this test too much.
-- 
cgit v1.2.3


From 59106c815895a6b3a33f296f856137090aa2b83e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 20 Nov 2017 10:24:00 -0800
Subject: test_firmware: wrap basic sysfs fallback tests into helper

These cannot run on all kernel builds. This will help us later
skip this test on kernel configs where non-applicable.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_fallback.sh | 78 +++++++++++++------------
 1 file changed, 41 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index e364631837d6..0d4527c5e8a4 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -212,37 +212,51 @@ test_syfs_timeout()
 	fi
 }
 
-test_syfs_timeout
+run_sysfs_main_tests()
+{
+	test_syfs_timeout
+	# Put timeout high enough for us to do work but not so long that failures
+	# slow down this test too much.
+	echo 4 >/sys/class/firmware/timeout
 
-# Put timeout high enough for us to do work but not so long that failures
-# slow down this test too much.
-echo 4 >/sys/class/firmware/timeout
+	# Load this script instead of the desired firmware.
+	load_fw "$NAME" "$0"
+	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not expected to match" >&2
+		exit 1
+	else
+		echo "$0: firmware comparison works"
+	fi
 
-# Load this script instead of the desired firmware.
-load_fw "$NAME" "$0"
-if diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was not expected to match" >&2
-	exit 1
-else
-	echo "$0: firmware comparison works"
-fi
+	# Do a proper load, which should work correctly.
+	load_fw "$NAME" "$FW"
+	if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not loaded" >&2
+		exit 1
+	else
+		echo "$0: fallback mechanism works"
+	fi
 
-# Do a proper load, which should work correctly.
-load_fw "$NAME" "$FW"
-if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was not loaded" >&2
-	exit 1
-else
-	echo "$0: fallback mechanism works"
-fi
+	load_fw_cancel "nope-$NAME" "$FW"
+	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was expected to be cancelled" >&2
+		exit 1
+	else
+		echo "$0: cancelling fallback mechanism works"
+	fi
 
-load_fw_cancel "nope-$NAME" "$FW"
-if diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was expected to be cancelled" >&2
-	exit 1
-else
-	echo "$0: cancelling fallback mechanism works"
-fi
+	set +e
+	load_fw_fallback_with_child "nope-signal-$NAME" "$FW"
+	if [ "$?" -eq 0 ]; then
+		echo "$0: SIGCHLD on sync ignored as expected" >&2
+	else
+		echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2
+		exit 1
+	fi
+	set -e
+}
+
+run_sysfs_main_tests
 
 if load_fw_custom "$NAME" "$FW" ; then
 	if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
@@ -262,14 +276,4 @@ if load_fw_custom_cancel "nope-$NAME" "$FW" ; then
 	fi
 fi
 
-set +e
-load_fw_fallback_with_child "nope-signal-$NAME" "$FW"
-if [ "$?" -eq 0 ]; then
-	echo "$0: SIGCHLD on sync ignored as expected" >&2
-else
-	echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2
-	exit 1
-fi
-set -e
-
 exit 0
-- 
cgit v1.2.3


From 82bdf495169f2e4c25c5bd0f9384b687d358ebac Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 20 Nov 2017 10:24:01 -0800
Subject: test_firmware: wrap custom sysfs load tests into helper

These can run on certain kernel configs. This will allow
us later to enable these tests under the right kernel
configurations.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_fallback.sh | 43 ++++++++++++++++---------
 1 file changed, 28 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 0d4527c5e8a4..722cad91df74 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -256,24 +256,37 @@ run_sysfs_main_tests()
 	set -e
 }
 
-run_sysfs_main_tests
+run_sysfs_custom_load_tests()
+{
+	if load_fw_custom "$NAME" "$FW" ; then
+		if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+			echo "$0: firmware was not loaded" >&2
+			exit 1
+		else
+			echo "$0: custom fallback loading mechanism works"
+		fi
+	fi
 
-if load_fw_custom "$NAME" "$FW" ; then
-	if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
-		echo "$0: firmware was not loaded" >&2
-		exit 1
-	else
-		echo "$0: custom fallback loading mechanism works"
+	if load_fw_custom "$NAME" "$FW" ; then
+		if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+			echo "$0: firmware was not loaded" >&2
+			exit 1
+		else
+			echo "$0: custom fallback loading mechanism works"
+		fi
 	fi
-fi
 
-if load_fw_custom_cancel "nope-$NAME" "$FW" ; then
-	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
-		echo "$0: firmware was expected to be cancelled" >&2
-		exit 1
-	else
-		echo "$0: cancelling custom fallback mechanism works"
+	if load_fw_custom_cancel "nope-$NAME" "$FW" ; then
+		if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+			echo "$0: firmware was expected to be cancelled" >&2
+			exit 1
+		else
+			echo "$0: cancelling custom fallback mechanism works"
+		fi
 	fi
-fi
+}
+
+run_sysfs_main_tests
+run_sysfs_custom_load_tests
 
 exit 0
-- 
cgit v1.2.3


From 6a28b446b7d2d5455080d5b772c50b99859d6cf5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 30 Nov 2017 21:31:41 -0800
Subject: selftests/bpf: adjust test_align expected output

since verifier started to print liveness state of the registers
adjust expected output of test_align.
Now this test checks for both proper alignment handling by verifier
and correctness of liveness marks.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_align.c | 156 +++++++++++++++----------------
 1 file changed, 78 insertions(+), 78 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 8591c89c0828..fe916d29e166 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -64,11 +64,11 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{1, "R1=ctx(id=0,off=0,imm=0)"},
 			{1, "R10=fp0"},
-			{1, "R3=inv2"},
-			{2, "R3=inv4"},
-			{3, "R3=inv8"},
-			{4, "R3=inv16"},
-			{5, "R3=inv32"},
+			{1, "R3_w=inv2"},
+			{2, "R3_w=inv4"},
+			{3, "R3_w=inv8"},
+			{4, "R3_w=inv16"},
+			{5, "R3_w=inv32"},
 		},
 	},
 	{
@@ -92,17 +92,17 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{1, "R1=ctx(id=0,off=0,imm=0)"},
 			{1, "R10=fp0"},
-			{1, "R3=inv1"},
-			{2, "R3=inv2"},
-			{3, "R3=inv4"},
-			{4, "R3=inv8"},
-			{5, "R3=inv16"},
-			{6, "R3=inv1"},
-			{7, "R4=inv32"},
-			{8, "R4=inv16"},
-			{9, "R4=inv8"},
-			{10, "R4=inv4"},
-			{11, "R4=inv2"},
+			{1, "R3_w=inv1"},
+			{2, "R3_w=inv2"},
+			{3, "R3_w=inv4"},
+			{4, "R3_w=inv8"},
+			{5, "R3_w=inv16"},
+			{6, "R3_w=inv1"},
+			{7, "R4_w=inv32"},
+			{8, "R4_w=inv16"},
+			{9, "R4_w=inv8"},
+			{10, "R4_w=inv4"},
+			{11, "R4_w=inv2"},
 		},
 	},
 	{
@@ -121,12 +121,12 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{1, "R1=ctx(id=0,off=0,imm=0)"},
 			{1, "R10=fp0"},
-			{1, "R3=inv4"},
-			{2, "R3=inv8"},
-			{3, "R3=inv10"},
-			{4, "R4=inv8"},
-			{5, "R4=inv12"},
-			{6, "R4=inv14"},
+			{1, "R3_w=inv4"},
+			{2, "R3_w=inv8"},
+			{3, "R3_w=inv10"},
+			{4, "R4_w=inv8"},
+			{5, "R4_w=inv12"},
+			{6, "R4_w=inv14"},
 		},
 	},
 	{
@@ -143,10 +143,10 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{1, "R1=ctx(id=0,off=0,imm=0)"},
 			{1, "R10=fp0"},
-			{1, "R3=inv7"},
-			{2, "R3=inv7"},
-			{3, "R3=inv14"},
-			{4, "R3=inv56"},
+			{1, "R3_w=inv7"},
+			{2, "R3_w=inv7"},
+			{3, "R3_w=inv14"},
+			{4, "R3_w=inv56"},
 		},
 	},
 
@@ -185,18 +185,18 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
 			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
-			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
 			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
-			{18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
-			{20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
-			{21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+			{20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
 		},
 	},
 	{
@@ -217,16 +217,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
 		},
 	},
 	{
@@ -257,14 +257,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
-			{5, "R5=pkt(id=0,off=14,r=0,imm=0)"},
-			{6, "R4=pkt(id=0,off=14,r=0,imm=0)"},
+			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+			{5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+			{6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
 			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
 			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
-			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
-			{15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
 		},
 	},
 	{
@@ -320,11 +320,11 @@ static struct bpf_align_test tests[] = {
 			 * alignment of 4.
 			 */
 			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
-			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
 			 */
-			{11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
 			 * reg->aux_off (14) which is 16.  Then the variable
@@ -336,11 +336,11 @@ static struct bpf_align_test tests[] = {
 			/* Variable offset is added to R5 packet pointer,
 			 * resulting in auxiliary alignment of 4.
 			 */
-			{18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			{19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off
 			 * (14) which is 16.  Then the variable offset is 4-byte
@@ -352,18 +352,18 @@ static struct bpf_align_test tests[] = {
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{26, "R5=pkt(id=0,off=14,r=8"},
+			{26, "R5_w=pkt(id=0,off=14,r=8"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n).
 			 */
-			{27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant is added to R5 again, setting reg->off to 18. */
-			{28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* And once more we add a variable; resulting var_off
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
@@ -410,11 +410,11 @@ static struct bpf_align_test tests[] = {
 			 * alignment of 4.
 			 */
 			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
-			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* Packet pointer has (4n+2) offset */
-			{11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -426,11 +426,11 @@ static struct bpf_align_test tests[] = {
 			/* Newly read value in R6 was shifted left by 2, so has
 			 * known alignment of 4.
 			 */
-			{18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Added (4n) to packet pointer's (4n+2) var_off, giving
 			 * another (4n+2).
 			 */
-			{19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
 			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -473,11 +473,11 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
 			/* ptr & 0x40 == either 0 or 0x40 */
-			{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
+			{5, "R5_w=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
 			/* ptr << 2 == unknown, (4n) */
-			{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			{7, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
@@ -485,7 +485,7 @@ static struct bpf_align_test tests[] = {
 			/* Checked s>=0 */
 			{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			/* packet pointer + nonnegative (4n+2) */
-			{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
@@ -530,11 +530,11 @@ static struct bpf_align_test tests[] = {
 			 * alignment of 4.
 			 */
 			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
-			{9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* New unknown value in R7 is (4n) */
-			{11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Subtracting it from R6 blows our unsigned bounds */
 			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
 			/* Checked s>= 0 */
@@ -583,15 +583,15 @@ static struct bpf_align_test tests[] = {
 			 * alignment of 4.
 			 */
 			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
-			{10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
 			/* New unknown value in R7 is (4n), >= 76 */
-			{15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
-- 
cgit v1.2.3


From 417ec26477a5c19abc72dd0298f48ebe5d2db43a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 1 Dec 2017 15:09:00 -0800
Subject: selftests/bpf: add offload test based on netdevsim

Add a test of BPF offload control path interfaces based on
just-added netdevsim driver.  Perform various checks of both
the stack and the expected driver behaviour.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile        |   5 +-
 tools/testing/selftests/bpf/sample_ret0.c   |   7 +
 tools/testing/selftests/bpf/test_offload.py | 681 ++++++++++++++++++++++++++++
 3 files changed, 691 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/sample_ret0.c
 create mode 100755 tools/testing/selftests/bpf/test_offload.py

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 333a48655ee0..2c9d8c63c6fa 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -17,9 +17,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
-	sockmap_verdict_prog.o dev_cgroup.o
+	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o
 
-TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh
+TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
+	test_offload.py
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c
new file mode 100644
index 000000000000..fec99750d6ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_ret0.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+/* Sample program which should always load for testing control paths. */
+int func()
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
new file mode 100755
index 000000000000..3914f7a4585a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -0,0 +1,681 @@
+#!/usr/bin/python3
+
+# Copyright (C) 2017 Netronome Systems, Inc.
+#
+# This software is licensed under the GNU General License Version 2,
+# June 1991 as shown in the file COPYING in the top-level directory of this
+# source tree.
+#
+# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+from datetime import datetime
+import argparse
+import json
+import os
+import pprint
+import subprocess
+import time
+
+logfile = None
+log_level = 1
+bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
+pp = pprint.PrettyPrinter()
+devs = [] # devices we created for clean up
+files = [] # files to be removed
+
+def log_get_sec(level=0):
+    return "*" * (log_level + level)
+
+def log_level_inc(add=1):
+    global log_level
+    log_level += add
+
+def log_level_dec(sub=1):
+    global log_level
+    log_level -= sub
+
+def log_level_set(level):
+    global log_level
+    log_level = level
+
+def log(header, data, level=None):
+    """
+    Output to an optional log.
+    """
+    if logfile is None:
+        return
+    if level is not None:
+        log_level_set(level)
+
+    if not isinstance(data, str):
+        data = pp.pformat(data)
+
+    if len(header):
+        logfile.write("\n" + log_get_sec() + " ")
+        logfile.write(header)
+    if len(header) and len(data.strip()):
+        logfile.write("\n")
+    logfile.write(data)
+
+def skip(cond, msg):
+    if not cond:
+        return
+    print("SKIP: " + msg)
+    log("SKIP: " + msg, "", level=1)
+    os.sys.exit(0)
+
+def fail(cond, msg):
+    if not cond:
+        return
+    print("FAIL: " + msg)
+    log("FAIL: " + msg, "", level=1)
+    os.sys.exit(1)
+
+def start_test(msg):
+    log(msg, "", level=1)
+    log_level_inc()
+    print(msg)
+
+def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True):
+    """
+    Run a command in subprocess and return tuple of (retval, stdout);
+    optionally return stderr as well as third value.
+    """
+    proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    if background:
+        msg = "%s START: %s" % (log_get_sec(1),
+                                datetime.now().strftime("%H:%M:%S.%f"))
+        log("BKG " + proc.args, msg)
+        return proc
+
+    return cmd_result(proc, include_stderr=include_stderr, fail=fail)
+
+def cmd_result(proc, include_stderr=False, fail=False):
+    stdout, stderr = proc.communicate()
+    stdout = stdout.decode("utf-8")
+    stderr = stderr.decode("utf-8")
+    proc.stdout.close()
+    proc.stderr.close()
+
+    stderr = "\n" + stderr
+    if stderr[-1] == "\n":
+        stderr = stderr[:-1]
+
+    sec = log_get_sec(1)
+    log("CMD " + proc.args,
+        "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" %
+        (proc.returncode, sec, stdout, sec, stderr,
+         sec, datetime.now().strftime("%H:%M:%S.%f")))
+
+    if proc.returncode != 0 and fail:
+        if len(stderr) > 0 and stderr[-1] == "\n":
+            stderr = stderr[:-1]
+        raise Exception("Command failed: %s\n%s" % (proc.args, stderr))
+
+    if include_stderr:
+        return proc.returncode, stdout, stderr
+    else:
+        return proc.returncode, stdout
+
+def rm(f):
+    cmd("rm -f %s" % (f))
+    if f in files:
+        files.remove(f)
+
+def tool(name, args, flags, JSON=True, fail=True):
+    params = ""
+    if JSON:
+        params += "%s " % (flags["json"])
+
+    ret, out = cmd(name + " " + params + args, fail=fail)
+    if JSON and len(out.strip()) != 0:
+        return ret, json.loads(out)
+    else:
+        return ret, out
+
+def bpftool(args, JSON=True, fail=True):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail)
+
+def bpftool_prog_list(expected=None):
+    _, progs = bpftool("prog show", JSON=True, fail=True)
+    if expected is not None:
+        if len(progs) != expected:
+            fail(True, "%d BPF programs loaded, expected %d" %
+                 (len(progs), expected))
+    return progs
+
+def bpftool_prog_list_wait(expected=0, n_retry=20):
+    for i in range(n_retry):
+        nprogs = len(bpftool_prog_list())
+        if nprogs == expected:
+            return
+        time.sleep(0.05)
+    raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+
+def ip(args, force=False, JSON=True, fail=True):
+    if force:
+        args = "-force " + args
+    return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail)
+
+def tc(args, JSON=True, fail=True):
+    return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail)
+
+def ethtool(dev, opt, args, fail=True):
+    return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
+
+def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+    return "obj %s sec %s" % (os.path.join(path, name), sec)
+
+def bpf_pinned(name):
+    return "pinned %s" % (name)
+
+def bpf_bytecode(bytecode):
+    return "bytecode \"%s\"" % (bytecode)
+
+class DebugfsDir:
+    """
+    Class for accessing DebugFS directories as a dictionary.
+    """
+
+    def __init__(self, path):
+        self.path = path
+        self._dict = self._debugfs_dir_read(path)
+
+    def __len__(self):
+        return len(self._dict.keys())
+
+    def __getitem__(self, key):
+        if type(key) is int:
+            key = list(self._dict.keys())[key]
+        return self._dict[key]
+
+    def __setitem__(self, key, value):
+        log("DebugFS set %s = %s" % (key, value), "")
+        log_level_inc()
+
+        cmd("echo '%s' > %s/%s" % (value, self.path, key))
+        log_level_dec()
+
+        _, out = cmd('cat %s/%s' % (self.path, key))
+        self._dict[key] = out.strip()
+
+    def _debugfs_dir_read(self, path):
+        dfs = {}
+
+        log("DebugFS state for %s" % (path), "")
+        log_level_inc(add=2)
+
+        _, out = cmd('ls ' + path)
+        for f in out.split():
+            p = os.path.join(path, f)
+            if os.path.isfile(p):
+                _, out = cmd('cat %s/%s' % (path, f))
+                dfs[f] = out.strip()
+            elif os.path.isdir(p):
+                dfs[f] = DebugfsDir(p)
+            else:
+                raise Exception("%s is neither file nor directory" % (p))
+
+        log_level_dec()
+        log("DebugFS state", dfs)
+        log_level_dec()
+
+        return dfs
+
+class NetdevSim:
+    """
+    Class for netdevsim netdevice and its attributes.
+    """
+
+    def __init__(self):
+        self.dev = self._netdevsim_create()
+        devs.append(self)
+
+        self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+        self.dfs_refresh()
+
+    def __getitem__(self, key):
+        return self.dev[key]
+
+    def _netdevsim_create(self):
+        _, old  = ip("link show")
+        ip("link add sim%d type netdevsim")
+        _, new  = ip("link show")
+
+        for dev in new:
+            f = filter(lambda x: x["ifname"] == dev["ifname"], old)
+            if len(list(f)) == 0:
+                return dev
+
+        raise Exception("failed to create netdevsim device")
+
+    def remove(self):
+        devs.remove(self)
+        ip("link del dev %s" % (self.dev["ifname"]))
+
+    def dfs_refresh(self):
+        self.dfs = DebugfsDir(self.dfs_dir)
+        return self.dfs
+
+    def dfs_num_bound_progs(self):
+        path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+        _, progs = cmd('ls %s' % (path))
+        return len(progs.split())
+
+    def dfs_get_bound_progs(self, expected):
+        progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+        if expected is not None:
+            if len(progs) != expected:
+                fail(True, "%d BPF programs bound, expected %d" %
+                     (len(progs), expected))
+        return progs
+
+    def wait_for_flush(self, bound=0, total=0, n_retry=20):
+        for i in range(n_retry):
+            nbound = self.dfs_num_bound_progs()
+            nprogs = len(bpftool_prog_list())
+            if nbound == bound and nprogs == total:
+                return
+            time.sleep(0.05)
+        raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+
+    def set_mtu(self, mtu, fail=True):
+        return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
+                  fail=fail)
+
+    def set_xdp(self, bpf, mode, force=False, fail=True):
+        return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
+                  force=force, fail=fail)
+
+    def unset_xdp(self, mode, force=False, fail=True):
+        return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
+                  force=force, fail=fail)
+
+    def ip_link_show(self, xdp):
+        _, link = ip("link show dev %s" % (self['ifname']))
+        if len(link) > 1:
+            raise Exception("Multiple objects on ip link show")
+        if len(link) < 1:
+            return {}
+        fail(xdp != "xdp" in link,
+             "XDP program not reporting in iplink (reported %s, expected %s)" %
+             ("xdp" in link, xdp))
+        return link[0]
+
+    def tc_add_ingress(self):
+        tc("qdisc add dev %s ingress" % (self['ifname']))
+
+    def tc_del_ingress(self):
+        tc("qdisc del dev %s ingress" % (self['ifname']))
+
+    def tc_flush_filters(self, bound=0, total=0):
+        self.tc_del_ingress()
+        self.tc_add_ingress()
+        self.wait_for_flush(bound=bound, total=total)
+
+    def tc_show_ingress(self, expected=None):
+        # No JSON support, oh well...
+        flags = ["skip_sw", "skip_hw", "in_hw"]
+        named = ["protocol", "pref", "chain", "handle", "id", "tag"]
+
+        args = "-s filter show dev %s ingress" % (self['ifname'])
+        _, out = tc(args, JSON=False)
+
+        filters = []
+        lines = out.split('\n')
+        for line in lines:
+            words = line.split()
+            if "handle" not in words:
+                continue
+            fltr = {}
+            for flag in flags:
+                fltr[flag] = flag in words
+            for name in named:
+                try:
+                    idx = words.index(name)
+                    fltr[name] = words[idx + 1]
+                except ValueError:
+                    pass
+            filters.append(fltr)
+
+        if expected is not None:
+            fail(len(filters) != expected,
+                 "%d ingress filters loaded, expected %d" %
+                 (len(filters), expected))
+        return filters
+
+    def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False,
+                           fail=True):
+        params = ""
+        if da:
+            params += " da"
+        if skip_sw:
+            params += " skip_sw"
+        if skip_hw:
+            params += " skip_hw"
+        return tc("filter add dev %s ingress bpf %s %s" %
+                  (self['ifname'], bpf, params), fail=fail)
+
+    def set_ethtool_tc_offloads(self, enable, fail=True):
+        args = "hw-tc-offload %s" % ("on" if enable else "off")
+        return ethtool(self, "-K", args, fail=fail)
+
+################################################################################
+def clean_up():
+    for dev in devs:
+        dev.remove()
+    for f in files:
+        cmd("rm -f %s" % (f))
+
+def pin_prog(file_name, idx=0):
+    progs = bpftool_prog_list(expected=(idx + 1))
+    prog = progs[idx]
+    bpftool("prog pin id %d %s" % (prog["id"], file_name))
+    files.append(file_name)
+
+    return file_name, bpf_pinned(file_name)
+
+# Parse command line
+parser = argparse.ArgumentParser()
+parser.add_argument("--log", help="output verbose log to given file")
+args = parser.parse_args()
+if args.log:
+    logfile = open(args.log, 'w+')
+    logfile.write("# -*-Org-*-")
+
+log("Prepare...", "", level=1)
+log_level_inc()
+
+# Check permissions
+skip(os.getuid() != 0, "test must be run as root")
+
+# Check tools
+ret, progs = bpftool("prog", fail=False)
+skip(ret != 0, "bpftool not installed")
+# Check no BPF programs are loaded
+skip(len(progs) != 0, "BPF programs already loaded on the system")
+
+# Check netdevsim
+ret, out = cmd("modprobe netdevsim", fail=False)
+skip(ret != 0, "netdevsim module could not be loaded")
+
+# Check debugfs
+_, out = cmd("mount")
+if out.find("/sys/kernel/debug type debugfs") == -1:
+    cmd("mount -t debugfs none /sys/kernel/debug")
+
+# Check samples are compiled
+samples = ["sample_ret0.o"]
+for s in samples:
+    ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
+    skip(ret != 0, "sample %s/%s not found, please compile it" %
+         (bpf_test_dir, s))
+
+try:
+    obj = bpf_obj("sample_ret0.o")
+    bytecode = bpf_bytecode("1,6 0 0 4294967295,")
+
+    start_test("Test destruction of generic XDP...")
+    sim = NetdevSim()
+    sim.set_xdp(obj, "generic")
+    sim.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.tc_add_ingress()
+
+    start_test("Test TC non-offloaded...")
+    ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+
+    start_test("Test TC non-offloaded isn't getting bound...")
+    ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+    sim.dfs_get_bound_progs(expected=0)
+
+    sim.tc_flush_filters()
+
+    start_test("Test TC offloads are off by default...")
+    ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+    fail(ret == 0, "TC filter loaded without enabling TC offloads")
+    sim.wait_for_flush()
+
+    sim.set_ethtool_tc_offloads(True)
+    sim.dfs["bpf_tc_non_bound_accept"] = "Y"
+
+    start_test("Test TC offload by default...")
+    ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+    sim.dfs_get_bound_progs(expected=0)
+    ingress = sim.tc_show_ingress(expected=1)
+    fltr = ingress[0]
+    fail(not fltr["in_hw"], "Filter not offloaded by default")
+
+    sim.tc_flush_filters()
+
+    start_test("Test TC cBPF bytcode tries offload by default...")
+    ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+    sim.dfs_get_bound_progs(expected=0)
+    ingress = sim.tc_show_ingress(expected=1)
+    fltr = ingress[0]
+    fail(not fltr["in_hw"], "Bytecode not offloaded by default")
+
+    sim.tc_flush_filters()
+    sim.dfs["bpf_tc_non_bound_accept"] = "N"
+
+    start_test("Test TC cBPF unbound bytecode doesn't offload...")
+    ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False)
+    fail(ret == 0, "TC bytecode loaded for offload")
+    sim.wait_for_flush()
+
+    start_test("Test TC offloads work...")
+    ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+    fail(ret != 0, "TC filter did not load with TC offloads enabled")
+
+    start_test("Test TC offload basics...")
+    dfs = sim.dfs_get_bound_progs(expected=1)
+    progs = bpftool_prog_list(expected=1)
+    ingress = sim.tc_show_ingress(expected=1)
+
+    dprog = dfs[0]
+    prog = progs[0]
+    fltr = ingress[0]
+    fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter")
+    fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter")
+    fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back")
+
+    start_test("Test TC offload is device-bound...")
+    fail(str(prog["id"]) != fltr["id"], "Program IDs don't match")
+    fail(prog["tag"] != fltr["tag"], "Program tags don't match")
+    fail(fltr["id"] != dprog["id"], "Program IDs don't match")
+    fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+    fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+    start_test("Test disabling TC offloads is rejected while filters installed...")
+    ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+    fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+
+    start_test("Test qdisc removal frees things...")
+    sim.tc_flush_filters()
+    sim.tc_show_ingress(expected=0)
+
+    start_test("Test disabling TC offloads is OK without filters...")
+    ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+    fail(ret != 0,
+         "Driver refused to disable TC offloads without filters installed...")
+
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test destroying device gets rid of TC filters...")
+    sim.cls_bpf_add_filter(obj, skip_sw=True)
+    sim.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test destroying device gets rid of XDP...")
+    sim.set_xdp(obj, "offload")
+    sim.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test XDP prog reporting...")
+    sim.set_xdp(obj, "drv")
+    ipl = sim.ip_link_show(xdp=True)
+    progs = bpftool_prog_list(expected=1)
+    fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+         "Loaded program has wrong ID")
+
+    start_test("Test XDP prog replace without force...")
+    ret, _ = sim.set_xdp(obj, "drv", fail=False)
+    fail(ret == 0, "Replaced XDP program without -force")
+    sim.wait_for_flush(total=1)
+
+    start_test("Test XDP prog replace with force...")
+    ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False)
+    fail(ret != 0, "Could not replace XDP program with -force")
+    bpftool_prog_list_wait(expected=1)
+    ipl = sim.ip_link_show(xdp=True)
+    progs = bpftool_prog_list(expected=1)
+    fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+         "Loaded program has wrong ID")
+
+    start_test("Test XDP prog replace with bad flags...")
+    ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
+    fail(ret == 0, "Replaced XDP program with a program in different mode")
+    ret, _ = sim.set_xdp(obj, "", force=True, fail=False)
+    fail(ret == 0, "Replaced XDP program with a program in different mode")
+
+    start_test("Test XDP prog remove with bad flags...")
+    ret, _ = sim.unset_xdp("offload", force=True, fail=False)
+    fail(ret == 0, "Removed program with a bad mode mode")
+    ret, _ = sim.unset_xdp("", force=True, fail=False)
+    fail(ret == 0, "Removed program with a bad mode mode")
+
+    start_test("Test MTU restrictions...")
+    ret, _ = sim.set_mtu(9000, fail=False)
+    fail(ret == 0,
+         "Driver should refuse to increase MTU to 9000 with XDP loaded...")
+    sim.unset_xdp("drv")
+    bpftool_prog_list_wait(expected=0)
+    sim.set_mtu(9000)
+    ret, _ = sim.set_xdp(obj, "drv", fail=False)
+    fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+    sim.set_mtu(1500)
+
+    sim.wait_for_flush()
+    start_test("Test XDP offload...")
+    sim.set_xdp(obj, "offload")
+    ipl = sim.ip_link_show(xdp=True)
+    link_xdp = ipl["xdp"]["prog"]
+    progs = bpftool_prog_list(expected=1)
+    prog = progs[0]
+    fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+
+    start_test("Test XDP offload is device bound...")
+    dfs = sim.dfs_get_bound_progs(expected=1)
+    dprog = dfs[0]
+
+    fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
+    fail(prog["tag"] != link_xdp["tag"], "Program tags don't match")
+    fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match")
+    fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+    fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+    start_test("Test removing XDP program many times...")
+    sim.unset_xdp("offload")
+    sim.unset_xdp("offload")
+    sim.unset_xdp("drv")
+    sim.unset_xdp("drv")
+    sim.unset_xdp("")
+    sim.unset_xdp("")
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test attempt to use a program for a wrong device...")
+    sim2 = NetdevSim()
+    sim2.set_xdp(obj, "offload")
+    pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+
+    ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+    fail(ret == 0, "Pinned program loaded for a different device accepted")
+    sim2.remove()
+    ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+    fail(ret == 0, "Pinned program loaded for a removed device accepted")
+    rm(pin_file)
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test mixing of TC and XDP...")
+    sim.tc_add_ingress()
+    sim.set_xdp(obj, "offload")
+    ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+    fail(ret == 0, "Loading TC when XDP active should fail")
+    sim.unset_xdp("offload")
+    sim.wait_for_flush()
+
+    sim.cls_bpf_add_filter(obj, skip_sw=True)
+    ret, _ = sim.set_xdp(obj, "offload", fail=False)
+    fail(ret == 0, "Loading XDP when TC active should fail")
+
+    start_test("Test binding TC from pinned...")
+    pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+    sim.tc_flush_filters(bound=1, total=1)
+    sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True)
+    sim.tc_flush_filters(bound=1, total=1)
+
+    start_test("Test binding XDP from pinned...")
+    sim.set_xdp(obj, "offload")
+    pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1)
+
+    sim.set_xdp(pinned, "offload", force=True)
+    sim.unset_xdp("offload")
+    sim.set_xdp(pinned, "offload", force=True)
+    sim.unset_xdp("offload")
+
+    start_test("Test offload of wrong type fails...")
+    ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False)
+    fail(ret == 0, "Managed to attach XDP program to TC")
+
+    start_test("Test asking for TC offload of two filters...")
+    sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+    sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+    # The above will trigger a splat until TC cls_bpf drivers are fixed
+
+    sim.tc_flush_filters(bound=2, total=2)
+
+    start_test("Test if netdev removal waits for translation...")
+    delay_msec = 500
+    sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+    start = time.time()
+    cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
+               (sim['ifname'], obj)
+    tc_proc = cmd(cmd_line, background=True, fail=False)
+    # Wait for the verifier to start
+    while sim.dfs_num_bound_progs() <= 2:
+        pass
+    sim.remove()
+    end = time.time()
+    ret, _ = cmd_result(tc_proc, fail=False)
+    time_diff = end - start
+    log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff))
+
+    fail(ret == 0, "Managed to load TC filter on a unregistering device")
+    delay_sec = delay_msec * 0.001
+    fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
+         (time_diff, delay_sec))
+
+    print("%s: OK" % (os.path.basename(__file__)))
+
+finally:
+    log("Clean up...", "", level=1)
+    log_level_inc()
+    clean_up()
-- 
cgit v1.2.3


From 5783ee6ec3d3323a04cc69764d57cac7bf026332 Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Tue, 21 Nov 2017 18:17:19 +1100
Subject: selftests/powerpc: Check for pthread errors in tm-unavailable

Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../testing/selftests/powerpc/tm/tm-unavailable.c  | 43 +++++++++++++++++-----
 1 file changed, 34 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 96c37f84ce54..e6a0fad2bfd0 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -15,6 +15,7 @@
  */
 
 #define _GNU_SOURCE
+#include <error.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -33,6 +34,11 @@
 #define VSX_UNA_EXCEPTION	2
 
 #define NUM_EXCEPTIONS		3
+#define err_at_line(status, errnum, format, ...) \
+	error_at_line(status, errnum,  __FILE__, __LINE__, format ##__VA_ARGS__)
+
+#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__)
+#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__)
 
 struct Flags {
 	int touch_fp;
@@ -303,10 +309,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
 	 * checking if the failure cause is the one we expect.
 	 */
 	do {
+		int rc;
+
 		/* Bind 'ping' to CPU 0, as specified in 'attr'. */
-		pthread_create(&t0, attr, ping, (void *) &flags);
-		pthread_setname_np(t0, "ping");
-		pthread_join(t0, &ret_value);
+		rc = pthread_create(&t0, attr, ping, (void *) &flags);
+		if (rc)
+			pr_err(rc, "pthread_create()");
+		rc = pthread_setname_np(t0, "ping");
+		if (rc)
+			pr_warn(rc, "pthread_setname_np");
+		rc = pthread_join(t0, &ret_value);
+		if (rc)
+			pr_err(rc, "pthread_join");
+
 		retries--;
 	} while (ret_value != NULL && retries);
 
@@ -320,7 +335,7 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
 
 int main(int argc, char **argv)
 {
-	int exception; /* FP = 0, VEC = 1, VSX = 2 */
+	int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
 	pthread_t t1;
 	pthread_attr_t attr;
 	cpu_set_t cpuset;
@@ -330,13 +345,23 @@ int main(int argc, char **argv)
 	CPU_SET(0, &cpuset);
 
 	/* Init pthread attribute. */
-	pthread_attr_init(&attr);
+	rc = pthread_attr_init(&attr);
+	if (rc)
+		pr_err(rc, "pthread_attr_init()");
 
 	/* Set CPU 0 mask into the pthread attribute. */
-	pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
-
-	pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
-	pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */
+	rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	if (rc)
+		pr_err(rc, "pthread_attr_setaffinity_np()");
+
+	rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+	if (rc)
+		pr_err(rc, "pthread_create()");
+
+	/* Name it for systemtap convenience */
+	rc = pthread_setname_np(t1, "pong");
+	if (rc)
+		pr_warn(rc, "pthread_create()");
 
 	flags.result = 0;
 
-- 
cgit v1.2.3


From cdd77d3e193031cc67426cd671d8aa370f7dfee4 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 17 Nov 2017 16:23:08 -0800
Subject: nfit, libnvdimm: deprecate the generic SMART ioctl

The kernel's ND_IOCTL_SMART_THRESHOLD command is based on a payload
definition that has become broken / out-of-sync with recent versions of
the NVDIMM_FAMILY_INTEL definition. Deprecate the use of the
ND_IOCTL_SMART_THRESHOLD command in favor of the ND_CMD_CALL approach
taken by NVDIMM_FAMILY_{HPE,MSFT}, where we can manage the per-vendor
variance in userspace.

In a couple years, when the new scheme is widely deployed in userspace
packages, the ND_IOCTL_SMART_THRESHOLD support can be removed. For now
we prevent new binaries from compiling against the kernel header
definitions, but kernel still compatible with old binaries. The
libndctl.h [1] header is now the authoritative interface definition for
NVDIMM SMART.

[1]: https://github.com/pmem/ndctl
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c      | 39 ++++++++++++++---------
 tools/testing/nvdimm/test/nfit_test.h | 59 +++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 7217b2b953b5..640c02b08a50 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -440,39 +440,50 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
 	return 0;
 }
 
-static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len)
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len)
 {
-	static const struct nd_smart_payload smart_data = {
-		.flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
-			| ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
-			| ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
-		.health = ND_SMART_NON_CRITICAL_HEALTH,
-		.temperature = 23 * 16,
+	static const struct nd_intel_smart smart_data = {
+		.flags = ND_INTEL_SMART_HEALTH_VALID
+			| ND_INTEL_SMART_SPARES_VALID
+			| ND_INTEL_SMART_ALARM_VALID
+			| ND_INTEL_SMART_USED_VALID
+			| ND_INTEL_SMART_SHUTDOWN_VALID
+			| ND_INTEL_SMART_MTEMP_VALID,
+		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+		.media_temperature = 23 * 16,
+		.ctrl_temperature = 30 * 16,
+		.pmic_temperature = 40 * 16,
 		.spares = 75,
-		.alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
+		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+			| ND_INTEL_SMART_TEMP_TRIP,
+		.ait_status = 1,
 		.life_used = 5,
 		.shutdown_state = 0,
 		.vendor_size = 0,
+		.shutdown_count = 100,
 	};
 
 	if (buf_len < sizeof(*smart))
 		return -EINVAL;
-	memcpy(smart->data, &smart_data, sizeof(smart_data));
+	memcpy(smart, &smart_data, sizeof(smart_data));
 	return 0;
 }
 
-static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t,
+static int nfit_test_cmd_smart_threshold(
+		struct nd_intel_smart_threshold *smart_t,
 		unsigned int buf_len)
 {
-	static const struct nd_smart_threshold_payload smart_t_data = {
-		.alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
-		.temperature = 40 * 16,
+	static const struct nd_intel_smart_threshold smart_t_data = {
+		.alarm_control = ND_INTEL_SMART_SPARE_TRIP
+			| ND_INTEL_SMART_TEMP_TRIP,
+		.media_temperature = 40 * 16,
+		.ctrl_temperature = 30 * 16,
 		.spares = 5,
 	};
 
 	if (buf_len < sizeof(*smart_t))
 		return -EINVAL;
-	memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data));
+	memcpy(smart_t, &smart_t_data, sizeof(smart_t_data));
 	return 0;
 }
 
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 113b44675a71..b85fba2856c7 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -84,6 +84,65 @@ struct nd_cmd_ars_err_inj_stat {
 	} __packed record[0];
 } __packed;
 
+#define ND_INTEL_SMART 1
+#define ND_INTEL_SMART_THRESHOLD 2
+
+#define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
+#define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
+#define ND_INTEL_SMART_USED_VALID               (1 << 2)
+#define ND_INTEL_SMART_MTEMP_VALID              (1 << 3)
+#define ND_INTEL_SMART_CTEMP_VALID              (1 << 4)
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID     (1 << 5)
+#define ND_INTEL_SMART_AIT_STATUS_VALID         (1 << 6)
+#define ND_INTEL_SMART_PTEMP_VALID              (1 << 7)
+#define ND_INTEL_SMART_ALARM_VALID              (1 << 9)
+#define ND_INTEL_SMART_SHUTDOWN_VALID           (1 << 10)
+#define ND_INTEL_SMART_VENDOR_VALID             (1 << 11)
+#define ND_INTEL_SMART_SPARE_TRIP               (1 << 0)
+#define ND_INTEL_SMART_TEMP_TRIP                (1 << 1)
+#define ND_INTEL_SMART_CTEMP_TRIP               (1 << 2)
+#define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
+#define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
+#define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+
+struct nd_intel_smart {
+	__u32 status;
+	union {
+		struct {
+			__u32 flags;
+			__u8 reserved0[4];
+			__u8 health;
+			__u8 spares;
+			__u8 life_used;
+			__u8 alarm_flags;
+			__u16 media_temperature;
+			__u16 ctrl_temperature;
+			__u32 shutdown_count;
+			__u8 ait_status;
+			__u16 pmic_temperature;
+			__u8 reserved1[8];
+			__u8 shutdown_state;
+			__u32 vendor_size;
+			__u8 vendor_data[92];
+		} __packed;
+		__u8 data[128];
+	};
+} __packed;
+
+struct nd_intel_smart_threshold {
+	__u32 status;
+	union {
+		struct {
+			__u16 alarm_control;
+			__u8 spares;
+			__u16 media_temperature;
+			__u16 ctrl_temperature;
+			__u8 reserved[1];
+		} __packed;
+		__u8 data[8];
+	};
+} __packed;
+
 union acpi_object;
 typedef void *acpi_handle;
 
-- 
cgit v1.2.3


From ed07c4338dd5ceadb5ffed0a5be03488ac54f5d2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 24 Nov 2017 14:32:27 -0800
Subject: tools/testing/nvdimm: smart alarm/threshold control

Allow the smart_threshold values to be changed via the 'set smart
threshold command' and trigger notifications when the thresholds are
met.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c      | 157 ++++++++++++++++++++++++----------
 tools/testing/nvdimm/test/nfit_test.h |   9 ++
 2 files changed, 122 insertions(+), 44 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 640c02b08a50..2b57254342aa 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -168,6 +168,8 @@ struct nfit_test {
 		spinlock_t lock;
 	} ars_state;
 	struct device *dimm_dev[NUM_DCR];
+	struct nd_intel_smart *smart;
+	struct nd_intel_smart_threshold *smart_threshold;
 	struct badrange badrange;
 	struct work_struct work;
 };
@@ -440,50 +442,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
 	return 0;
 }
 
-static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len)
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+		struct nd_intel_smart *smart_data)
 {
-	static const struct nd_intel_smart smart_data = {
-		.flags = ND_INTEL_SMART_HEALTH_VALID
-			| ND_INTEL_SMART_SPARES_VALID
-			| ND_INTEL_SMART_ALARM_VALID
-			| ND_INTEL_SMART_USED_VALID
-			| ND_INTEL_SMART_SHUTDOWN_VALID
-			| ND_INTEL_SMART_MTEMP_VALID,
-		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
-		.media_temperature = 23 * 16,
-		.ctrl_temperature = 30 * 16,
-		.pmic_temperature = 40 * 16,
-		.spares = 75,
-		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
-			| ND_INTEL_SMART_TEMP_TRIP,
-		.ait_status = 1,
-		.life_used = 5,
-		.shutdown_state = 0,
-		.vendor_size = 0,
-		.shutdown_count = 100,
-	};
-
 	if (buf_len < sizeof(*smart))
 		return -EINVAL;
-	memcpy(smart, &smart_data, sizeof(smart_data));
+	memcpy(smart, smart_data, sizeof(*smart));
 	return 0;
 }
 
 static int nfit_test_cmd_smart_threshold(
-		struct nd_intel_smart_threshold *smart_t,
-		unsigned int buf_len)
+		struct nd_intel_smart_threshold *out,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *smart_t)
 {
-	static const struct nd_intel_smart_threshold smart_t_data = {
-		.alarm_control = ND_INTEL_SMART_SPARE_TRIP
-			| ND_INTEL_SMART_TEMP_TRIP,
-		.media_temperature = 40 * 16,
-		.ctrl_temperature = 30 * 16,
-		.spares = 5,
-	};
-
 	if (buf_len < sizeof(*smart_t))
 		return -EINVAL;
-	memcpy(smart_t, &smart_t_data, sizeof(smart_t_data));
+	memcpy(out, smart_t, sizeof(*smart_t));
+	return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+		struct device *dimm_dev, struct nd_intel_smart *smart,
+		struct nd_intel_smart_threshold *thresh)
+{
+	dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+			__func__, thresh->alarm_control, thresh->spares,
+			smart->spares, thresh->media_temperature,
+			smart->media_temperature, thresh->ctrl_temperature,
+			smart->ctrl_temperature);
+	if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+				&& smart->spares
+				<= thresh->spares)
+			|| ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+				&& smart->media_temperature
+				>= thresh->media_temperature)
+			|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+				&& smart->ctrl_temperature
+				>= thresh->ctrl_temperature)) {
+		device_lock(bus_dev);
+		__acpi_nvdimm_notify(dimm_dev, 0x81);
+		device_unlock(bus_dev);
+	}
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+		struct nd_intel_smart_set_threshold *in,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *thresh,
+		struct nd_intel_smart *smart,
+		struct device *bus_dev, struct device *dimm_dev)
+{
+	unsigned int size;
+
+	size = sizeof(*in) - 4;
+	if (buf_len < size)
+		return -EINVAL;
+	memcpy(thresh->data, in, size);
+	in->status = 0;
+	smart_notify(bus_dev, dimm_dev, smart, thresh);
+
 	return 0;
 }
 
@@ -608,7 +626,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 				|| !test_bit(func, &nfit_mem->dsm_mask))
 			return -ENOTTY;
 
-		/* lookup label space for the given dimm */
+		/* lookup per-dimm data */
 		for (i = 0; i < ARRAY_SIZE(handle); i++)
 			if (__to_nfit_memdev(nfit_mem)->device_handle ==
 					handle[i])
@@ -631,14 +649,19 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 			rc = nfit_test_cmd_set_config_data(buf, buf_len,
 				t->label[i - t->dcr_idx]);
 			break;
-		case ND_CMD_SMART:
-			rc = nfit_test_cmd_smart(buf, buf_len);
+		case ND_INTEL_SMART:
+			rc = nfit_test_cmd_smart(buf, buf_len,
+					&t->smart[i - t->dcr_idx]);
+			break;
+		case ND_INTEL_SMART_THRESHOLD:
+			rc = nfit_test_cmd_smart_threshold(buf, buf_len,
+					&t->smart_threshold[i - t->dcr_idx]);
 			break;
-		case ND_CMD_SMART_THRESHOLD:
-			rc = nfit_test_cmd_smart_threshold(buf, buf_len);
-			device_lock(&t->pdev.dev);
-			__acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
-			device_unlock(&t->pdev.dev);
+		case ND_INTEL_SMART_SET_THRESHOLD:
+			rc = nfit_test_cmd_smart_set_threshold(buf, buf_len,
+					&t->smart_threshold[i - t->dcr_idx],
+					&t->smart[i - t->dcr_idx],
+					&t->pdev.dev, t->dimm_dev[i]);
 			break;
 		default:
 			return -ENOTTY;
@@ -883,6 +906,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
 	NULL,
 };
 
+static void smart_init(struct nfit_test *t)
+{
+	int i;
+	const struct nd_intel_smart_threshold smart_t_data = {
+		.alarm_control = ND_INTEL_SMART_SPARE_TRIP
+			| ND_INTEL_SMART_TEMP_TRIP,
+		.media_temperature = 40 * 16,
+		.ctrl_temperature = 30 * 16,
+		.spares = 5,
+	};
+	const struct nd_intel_smart smart_data = {
+		.flags = ND_INTEL_SMART_HEALTH_VALID
+			| ND_INTEL_SMART_SPARES_VALID
+			| ND_INTEL_SMART_ALARM_VALID
+			| ND_INTEL_SMART_USED_VALID
+			| ND_INTEL_SMART_SHUTDOWN_VALID
+			| ND_INTEL_SMART_MTEMP_VALID,
+		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+		.media_temperature = 23 * 16,
+		.ctrl_temperature = 30 * 16,
+		.pmic_temperature = 40 * 16,
+		.spares = 75,
+		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+			| ND_INTEL_SMART_TEMP_TRIP,
+		.ait_status = 1,
+		.life_used = 5,
+		.shutdown_state = 0,
+		.vendor_size = 0,
+		.shutdown_count = 100,
+	};
+
+	for (i = 0; i < t->num_dcr; i++) {
+		memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
+		memcpy(&t->smart_threshold[i], &smart_t_data,
+				sizeof(smart_t_data));
+	}
+}
+
 static int nfit_test0_alloc(struct nfit_test *t)
 {
 	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -950,6 +1011,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
 			return -ENOMEM;
 	}
 
+	smart_init(t);
 	return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -980,6 +1042,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
 	if (!t->spa_set[1])
 		return -ENOMEM;
 
+	smart_init(t);
 	return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -1653,13 +1716,14 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
-	set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
 	set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
 	set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
 	set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
@@ -2065,6 +2129,11 @@ static int nfit_test_probe(struct platform_device *pdev)
 				sizeof(struct nfit_test_dcr *), GFP_KERNEL);
 		nfit_test->dcr_dma = devm_kcalloc(dev, num,
 				sizeof(dma_addr_t), GFP_KERNEL);
+		nfit_test->smart = devm_kcalloc(dev, num,
+				sizeof(struct nd_intel_smart), GFP_KERNEL);
+		nfit_test->smart_threshold = devm_kcalloc(dev, num,
+				sizeof(struct nd_intel_smart_threshold),
+				GFP_KERNEL);
 		if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
 				&& nfit_test->label_dma && nfit_test->dcr
 				&& nfit_test->dcr_dma && nfit_test->flush
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index b85fba2856c7..ba230f6f7676 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -86,6 +86,7 @@ struct nd_cmd_ars_err_inj_stat {
 
 #define ND_INTEL_SMART 1
 #define ND_INTEL_SMART_THRESHOLD 2
+#define ND_INTEL_SMART_SET_THRESHOLD 17
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -143,6 +144,14 @@ struct nd_intel_smart_threshold {
 	};
 } __packed;
 
+struct nd_intel_smart_set_threshold {
+	__u16 alarm_control;
+	__u8 spares;
+	__u16 media_temperature;
+	__u16 ctrl_temperature;
+	__u32 status;
+} __packed;
+
 union acpi_object;
 typedef void *acpi_handle;
 
-- 
cgit v1.2.3


From 77be4c878c72e411ad22af96b6f81dd45c26450a Mon Sep 17 00:00:00 2001
From: Julien BOIBESSOT <julien.boibessot@armadeus.com>
Date: Tue, 5 Dec 2017 18:48:14 +0100
Subject: tools/usbip: fixes build with musl libc toolchain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Indeed musl doesn't define old SIGCLD signal name but only new one SIGCHLD.
SIGCHLD is the new POSIX name for that signal so it doesn't change
anything on other libcs.

This fixes this kind of build error:

usbipd.c: In function ‘set_signal’:
usbipd.c:459:12: error: 'SIGCLD' undeclared (first use in this function)
  sigaction(SIGCLD, &act, NULL);
            ^~~~~~
usbipd.c:459:12: note: each undeclared identifier is reported only once
	for each function it appears in
Makefile:407: recipe for target 'usbipd.o' failed
make[3]: *** [usbipd.o] Error 1

Signed-off-by: Julien BOIBESSOT <julien.boibessot@armadeus.com>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/src/usbipd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/usb/usbip/src/usbipd.c b/tools/usb/usbip/src/usbipd.c
index 009afb4a3aae..c6dad2a13c80 100644
--- a/tools/usb/usbip/src/usbipd.c
+++ b/tools/usb/usbip/src/usbipd.c
@@ -456,7 +456,7 @@ static void set_signal(void)
 	sigaction(SIGTERM, &act, NULL);
 	sigaction(SIGINT, &act, NULL);
 	act.sa_handler = SIG_IGN;
-	sigaction(SIGCLD, &act, NULL);
+	sigaction(SIGCHLD, &act, NULL);
 }
 
 static const char *pid_file;
-- 
cgit v1.2.3


From 658e85aa4ff2951f1e5163767827eaffccd51067 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 7 Dec 2017 15:00:17 -0800
Subject: tools: bpftool: harmonise Makefile and Documentation/Makefile

Several minor fixes and harmonisation items for Makefiles:

* Use the same mechanism for verbose/non-verbose output in two files
  ("$(Q)"), for all commands.
* Use calls to "QUIET_INSTALL" and equivalent in Makefile. In
  particular, use "call(descend, ...)" instead of "make -C" to run
  documentation targets.
* Add a "doc-clean" target, aligned on "doc" and "doc-install".
* Make "install" target in Makefile depend on "bpftool".
* Remove condition on DESTDIR to initialise prefix in doc Makefile.
* Remove modification of VPATH based on OUTPUT, it is unused.
* Formatting: harmonise spaces around equal signs.
* Make install path for man pages /usr/local/man instead of
  /usr/local/share/man (respects the Makefile conventions, and the
  latter is usually a symbolic link to the former anyway).
* Do not erase prefix if set by user in bpftool Makefile.
* Fix install target for bpftool: append DESTDIR to install path.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/Makefile | 22 +++++++------
 tools/bpf/bpftool/Makefile               | 53 ++++++++++++++------------------
 2 files changed, 36 insertions(+), 39 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 37292bb5ce60..71c17fab4f2f 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -4,11 +4,14 @@ include ../../../scripts/utilities.mak
 INSTALL ?= install
 RM ?= rm -f
 
-# Make the path relative to DESTDIR, not prefix
-ifndef DESTDIR
-prefix ?= /usr/local
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
 endif
-mandir ?= $(prefix)/share/man
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
 man8dir = $(mandir)/man8
 
 MAN8_RST = $(wildcard *.rst)
@@ -20,15 +23,16 @@ man: man8
 man8: $(DOC_MAN8)
 
 $(OUTPUT)%.8: %.rst
-	rst2man $< > $@
+	$(QUIET_GEN)rst2man $< > $@
 
 clean:
-	$(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8)
+	$(call QUIET_CLEAN, Documentation)
+	$(Q)$(RM) $(DOC_MAN8)
 
 install: man
-	$(call QUIET_INSTALL, Documentation-man) \
-		$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \
-		$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir);
+	$(call QUIET_INSTALL, Documentation-man)
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
+	$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
 
 .PHONY: man man8 clean install
 .DEFAULT_GOAL := man
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index ec3052c0b004..203ae2e14fbc 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -1,25 +1,10 @@
 include ../../scripts/Makefile.include
-
 include ../../scripts/utilities.mak
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-ifneq ($(objtree),)
-#$(info Determined 'objtree' to be $(objtree))
-endif
-
-ifneq ($(OUTPUT),)
-#$(info Determined 'OUTPUT' to be $(OUTPUT))
-# Adding $(OUTPUT) as a directory to look for source files,
-# because use generated output files as sources dependency
-# for flex/bison parsers.
-VPATH += $(OUTPUT)
-export VPATH
 endif
 
 ifeq ($(V),1)
@@ -28,12 +13,12 @@ else
   Q = @
 endif
 
-BPF_DIR	= $(srctree)/tools/lib/bpf/
+BPF_DIR = $(srctree)/tools/lib/bpf/
 
 ifneq ($(OUTPUT),)
-  BPF_PATH=$(OUTPUT)
+  BPF_PATH = $(OUTPUT)
 else
-  BPF_PATH=$(BPF_DIR)
+  BPF_PATH = $(BPF_DIR)
 endif
 
 LIBBPF = $(BPF_PATH)libbpf.a
@@ -45,7 +30,7 @@ $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
 
-prefix = /usr/local
+prefix ?= /usr/local
 bash_compdir ?= /usr/share/bash-completion/completions
 
 CC = gcc
@@ -55,12 +40,15 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
+INSTALL ?= install
+RM ?= rm -f
+
 include $(wildcard *.d)
 
 all: $(OUTPUT)bpftool
 
-SRCS=$(wildcard *.c)
-OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+SRCS = $(wildcard *.c)
+OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
@@ -73,21 +61,26 @@ $(OUTPUT)%.o: %.c
 
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
-	$(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
 
-install:
-	install -m 0755 -d $(prefix)/sbin
-	install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
-	install -m 0755 -d $(bash_compdir)
-	install -m 0644 bash-completion/bpftool $(bash_compdir)
+install: $(OUTPUT)bpftool
+	$(call QUIET_INSTALL, bpftool)
+	$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin
+	$(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool
+	$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir)
+	$(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir)
 
 doc:
-	$(Q)$(MAKE) -C Documentation/
+	$(call descend,Documentation)
+
+doc-clean:
+	$(call descend,Documentation,clean)
 
 doc-install:
-	$(Q)$(MAKE) -C Documentation/ install
+	$(call descend,Documentation,install)
 
 FORCE:
 
-.PHONY: all clean FORCE install doc doc-install
+.PHONY: all FORCE clean install
+.PHONY: doc doc-clean doc-install
 .DEFAULT_GOAL := all
-- 
cgit v1.2.3


From d32442485df7633fc67245e3e614d29ac3c45dbd Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 7 Dec 2017 15:00:18 -0800
Subject: tools: bpftool: create "uninstall", "doc-uninstall" make targets

Create two targets to remove executable and documentation that would
have been previously installed with `make install` and `make
doc-install`.

Also create a "QUIET_UNINST" helper in tools/scripts/Makefile.include.

Do not attempt to remove directories /usr/local/sbin and
/usr/share/bash-completions/completions, even if they are empty, as
those specific directories probably already existed on the system before
we installed the program, and we do not wish to break other makefiles
that might assume their existence. Do remvoe /usr/local/share/man/man8
if empty however, as this directory does not seem to exist by default.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/Makefile |  8 +++++++-
 tools/bpf/bpftool/Makefile               | 12 ++++++++++--
 tools/scripts/Makefile.include           |  1 +
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 71c17fab4f2f..c462a928e03d 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -3,6 +3,7 @@ include ../../../scripts/utilities.mak
 
 INSTALL ?= install
 RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
 
 ifeq ($(V),1)
   Q =
@@ -34,5 +35,10 @@ install: man
 	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
 	$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
 
-.PHONY: man man8 clean install
+uninstall:
+	$(call QUIET_UNINST, Documentation-man)
+	$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
+	$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
+
+.PHONY: man man8 clean install uninstall
 .DEFAULT_GOAL := man
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 203ae2e14fbc..3f17ad317512 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -70,6 +70,11 @@ install: $(OUTPUT)bpftool
 	$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir)
 	$(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir)
 
+uninstall:
+	$(call QUIET_UNINST, bpftool)
+	$(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool
+	$(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool
+
 doc:
 	$(call descend,Documentation)
 
@@ -79,8 +84,11 @@ doc-clean:
 doc-install:
 	$(call descend,Documentation,install)
 
+doc-uninstall:
+	$(call descend,Documentation,uninstall)
+
 FORCE:
 
-.PHONY: all FORCE clean install
-.PHONY: doc doc-clean doc-install
+.PHONY: all FORCE clean install uninstall
+.PHONY: doc doc-clean doc-install doc-uninstall
 .DEFAULT_GOAL := all
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 3fab179b1aba..fcb3ed0be5f8 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -99,5 +99,6 @@ ifneq ($(silent),1)
 
 	QUIET_CLEAN    = @printf '  CLEAN    %s\n' $1;
 	QUIET_INSTALL  = @printf '  INSTALL  %s\n' $1;
+	QUIET_UNINST   = @printf '  UNINST   %s\n' $1;
   endif
 endif
-- 
cgit v1.2.3


From f36dbfe1a504b85c7b3bf89fdd99991afbaa0f74 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Fri, 1 Sep 2017 10:17:14 +0800
Subject: selftests/powerpc: Fix build errors in powerpc ptrace selftests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 7 will take "r2" in clobber list as an error and it will get
following build errors for powerpc ptrace selftests even with -fno-pic
option:
  ptrace-tm-vsx.c: In function ‘tm_vsx’:
  ptrace-tm-vsx.c:42:2: error: PIC register clobbered by ‘r2’ in ‘asm’
    asm __volatile__(
    ^~~
  make[1]: *** [ptrace-tm-vsx] Error 1
  ptrace-tm-spd-vsx.c: In function ‘tm_spd_vsx’:
  ptrace-tm-spd-vsx.c:55:2: error: PIC register clobbered by ‘r2’ in ‘asm’
    asm __volatile__(
    ^~~
  make[1]: *** [ptrace-tm-spd-vsx] Error 1
  ptrace-tm-spr.c: In function ‘tm_spr’:
  ptrace-tm-spr.c:46:2: error: PIC register clobbered by ‘r2’ in ‘asm’
    asm __volatile__(
    ^~~

Fix the build error by removing "r2" from the clobber list. None of
these asm blocks actually clobber r2.

Reported-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c | 4 ++--
 tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c     | 3 +--
 tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c     | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 0df3c23b7888..277dade1b382 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -79,8 +79,8 @@ trans:
 		: [res] "=r" (result), [texasr] "=r" (texasr)
 		: [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt),
 		[sprn_texasr] "i"  (SPRN_TEXASR)
-		: "memory", "r0", "r1", "r2", "r3", "r4",
-		"r8", "r9", "r10", "r11"
+		: "memory", "r0", "r1", "r3", "r4",
+		"r7", "r8", "r9", "r10", "r11"
 		);
 
 	if (result) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 94e57cb89769..51427a2465f6 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -76,8 +76,7 @@ trans:
 		: [tfhar] "=r" (tfhar), [res] "=r" (result),
 		[texasr] "=r" (texasr), [cptr1] "=r" (cptr1)
 		: [sprn_texasr] "i"  (SPRN_TEXASR)
-		: "memory", "r0", "r1", "r2", "r3", "r4",
-		"r8", "r9", "r10", "r11", "r31"
+		: "memory", "r0", "r8", "r31"
 		);
 
 	/* There are 2 32bit instructions before tbegin. */
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index b4081e2b22d5..17c23cabac3e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -67,7 +67,7 @@ trans:
 		: [res] "=r" (result), [texasr] "=r" (texasr)
 		: [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt),
 		[sprn_texasr] "i"  (SPRN_TEXASR), [cptr1] "r" (&cptr[1])
-		: "memory", "r0", "r1", "r2", "r3", "r4",
+		: "memory", "r0", "r1", "r3", "r4",
 		"r7", "r8", "r9", "r10", "r11"
 		);
 
-- 
cgit v1.2.3


From 63060c39161d3d61c771dee20a3cbdffaf83f1df Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Tue, 12 Dec 2017 00:55:23 +0530
Subject: selftests: bpf: Adding config fragment CONFIG_CGROUP_BPF=y

CONFIG_CGROUP_BPF=y is required for test_dev_cgroup test case.

Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 52d53ed08769..9d4897317c77 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -3,3 +3,4 @@ CONFIG_BPF_SYSCALL=y
 CONFIG_NET_CLS_BPF=m
 CONFIG_BPF_EVENTS=y
 CONFIG_TEST_BPF=m
+CONFIG_CGROUP_BPF=y
-- 
cgit v1.2.3


From d279f1f8c64711ca986c3121c8ec811b892932f0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 11 Dec 2017 11:39:03 -0800
Subject: bpf/tracing: add a bpf test for new ioctl query interface

Added a subtest in test_progs. The tracepoint is
sched/sched_switch. Multiple bpf programs are attached to
this tracepoint and the query interface is exercised.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/perf_event.h         |  22 +++++
 tools/testing/selftests/bpf/Makefile          |   2 +-
 tools/testing/selftests/bpf/test_progs.c      | 133 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tracepoint.c |  26 +++++
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_tracepoint.c

(limited to 'tools')

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 362493a2f950..f2c354d5f519 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -418,6 +418,27 @@ struct perf_event_attr {
 	__u16	__reserved_2;	/* align to __u64 */
 };
 
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_BPF command
+ * to query bpf programs attached to the same perf tracepoint
+ * as the given perf event.
+ */
+struct perf_event_query_bpf {
+	/*
+	 * The below ids array length
+	 */
+	__u32	ids_len;
+	/*
+	 * Set by the kernel to indicate the number of
+	 * available programs
+	 */
+	__u32	prog_cnt;
+	/*
+	 * User provided buffer to store program ids
+	 */
+	__u32	ids[0];
+};
+
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
 
 /*
@@ -433,6 +454,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
 #define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_QUERY_BPF	_IOWR('$', 10, struct perf_event_query_bpf *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2c9d8c63c6fa..255fb1f50f6b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -17,7 +17,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
-	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o
+	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 69427531408d..1d7d2149163a 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -21,8 +21,10 @@ typedef __u16 __sum16;
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/filter.h>
+#include <linux/perf_event.h>
 #include <linux/unistd.h>
 
+#include <sys/ioctl.h>
 #include <sys/wait.h>
 #include <sys/resource.h>
 #include <sys/types.h>
@@ -617,6 +619,136 @@ static void test_obj_name(void)
 	}
 }
 
+static void test_tp_attach_query(void)
+{
+	const int num_progs = 3;
+	int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
+	__u32 duration = 0, info_len, saved_prog_ids[num_progs];
+	const char *file = "./test_tracepoint.o";
+	struct perf_event_query_bpf *query;
+	struct perf_event_attr attr = {};
+	struct bpf_object *obj[num_progs];
+	struct bpf_prog_info prog_info;
+	char buf[256];
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		return;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		return;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
+	for (i = 0; i < num_progs; i++) {
+		err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+				    &prog_fd[i]);
+		if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+			goto cleanup1;
+
+		bzero(&prog_info, sizeof(prog_info));
+		prog_info.jited_prog_len = 0;
+		prog_info.xlated_prog_len = 0;
+		prog_info.nr_map_ids = 0;
+		info_len = sizeof(prog_info);
+		err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
+		if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+			  err, errno))
+			goto cleanup1;
+		saved_prog_ids[i] = prog_info.id;
+
+		pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+				    0 /* cpu 0 */, -1 /* group id */,
+				    0 /* flags */);
+		if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
+			  pmu_fd[i], errno))
+			goto cleanup2;
+		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+		if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+			  err, errno))
+			goto cleanup3;
+
+		if (i == 0) {
+			/* check NULL prog array query */
+			query->ids_len = num_progs;
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+			if (CHECK(err || query->prog_cnt != 0,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d query->prog_cnt %u\n",
+				  err, errno, query->prog_cnt))
+				goto cleanup3;
+		}
+
+		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
+		if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+			  err, errno))
+			goto cleanup3;
+
+		if (i == 1) {
+			/* try to get # of programs only */
+			query->ids_len = 0;
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+			if (CHECK(err || query->prog_cnt != 2,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d query->prog_cnt %u\n",
+				  err, errno, query->prog_cnt))
+				goto cleanup3;
+
+			/* try a few negative tests */
+			/* invalid query pointer */
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
+				    (struct perf_event_query_bpf *)0x1);
+			if (CHECK(!err || errno != EFAULT,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d\n", err, errno))
+				goto cleanup3;
+
+			/* no enough space */
+			query->ids_len = 1;
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+			if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d query->prog_cnt %u\n",
+				  err, errno, query->prog_cnt))
+				goto cleanup3;
+		}
+
+		query->ids_len = num_progs;
+		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+		if (CHECK(err || query->prog_cnt != (i + 1),
+			  "perf_event_ioc_query_bpf",
+			  "err %d errno %d query->prog_cnt %u\n",
+			  err, errno, query->prog_cnt))
+			goto cleanup3;
+		for (j = 0; j < i + 1; j++)
+			if (CHECK(saved_prog_ids[j] != query->ids[j],
+				  "perf_event_ioc_query_bpf",
+				  "#%d saved_prog_id %x query prog_id %x\n",
+				  j, saved_prog_ids[j], query->ids[j]))
+				goto cleanup3;
+	}
+
+	i = num_progs - 1;
+	for (; i >= 0; i--) {
+ cleanup3:
+		ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
+ cleanup2:
+		close(pmu_fd[i]);
+ cleanup1:
+		bpf_object__close(obj[i]);
+	}
+	free(query);
+}
+
 int main(void)
 {
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -630,6 +762,7 @@ int main(void)
 	test_bpf_obj_id();
 	test_pkt_md_access();
 	test_obj_name();
+	test_tp_attach_query();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c
new file mode 100644
index 000000000000..04bf084517e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tracepoint.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+	unsigned long long pad;
+	char prev_comm[16];
+	int prev_pid;
+	int prev_prio;
+	long long prev_state;
+	char next_comm[16];
+	int next_pid;
+	int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
-- 
cgit v1.2.3


From 965de87e54b803223bff703ea6b2a76c056695ae Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Mon, 11 Dec 2017 11:36:49 -0500
Subject: samples/bpf: add a test for bpf_override_return

This adds a basic test for bpf_override_return to verify it works.  We
override the main function for mounting a btrfs fs so it'll return
-ENOMEM and then make sure that trying to mount a btrfs fs will fail.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h            | 7 ++++++-
 tools/testing/selftests/bpf/bpf_helpers.h | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c223ab30293..cf446c25c0ec 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -677,6 +677,10 @@ union bpf_attr {
  *     @buf: buf to fill
  *     @buf_size: size of the buf
  *     Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ *	@pt_regs: pointer to struct pt_regs
+ *	@rc: the return value to set
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -736,7 +740,8 @@ union bpf_attr {
 	FN(xdp_adjust_meta),		\
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
-	FN(getsockopt),
+	FN(getsockopt),			\
+	FN(override_return),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index fd9a17fa8a8b..33cb00e46c49 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
 static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
 				       unsigned int buf_size) =
 	(void *) BPF_FUNC_perf_prog_read_value;
-
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+	(void *) BPF_FUNC_override_return;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
-- 
cgit v1.2.3


From 583c90097f7271ab90f149b52b9ac2098bf2cbb5 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 13 Dec 2017 15:18:51 +0000
Subject: libbpf: add ability to guess program type based on section name

The bpf_prog_load() function will guess program type if it's not
specified explicitly. This functionality will be used to implement
loading of different programs without asking a user to specify
the program type. In first order it will be used by bpftool.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5aa45f89da93..205b7822fa0a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1721,6 +1721,45 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
+#define BPF_PROG_SEC(string, type) { string, sizeof(string), type }
+static const struct {
+	const char *sec;
+	size_t len;
+	enum bpf_prog_type prog_type;
+} section_names[] = {
+	BPF_PROG_SEC("socket",		BPF_PROG_TYPE_SOCKET_FILTER),
+	BPF_PROG_SEC("kprobe/",		BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("kretprobe/",	BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("tracepoint/",	BPF_PROG_TYPE_TRACEPOINT),
+	BPF_PROG_SEC("xdp",		BPF_PROG_TYPE_XDP),
+	BPF_PROG_SEC("perf_event",	BPF_PROG_TYPE_PERF_EVENT),
+	BPF_PROG_SEC("cgroup/skb",	BPF_PROG_TYPE_CGROUP_SKB),
+	BPF_PROG_SEC("cgroup/sock",	BPF_PROG_TYPE_CGROUP_SOCK),
+	BPF_PROG_SEC("cgroup/dev",	BPF_PROG_TYPE_CGROUP_DEVICE),
+	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
+	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+};
+#undef BPF_PROG_SEC
+
+static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
+{
+	int i;
+
+	if (!prog->section_name)
+		goto err;
+
+	for (i = 0; i < ARRAY_SIZE(section_names); i++)
+		if (strncmp(prog->section_name, section_names[i].sec,
+			    section_names[i].len) == 0)
+			return section_names[i].prog_type;
+
+err:
+	pr_warning("failed to guess program type based on section name %s\n",
+		   prog->section_name);
+
+	return BPF_PROG_TYPE_UNSPEC;
+}
+
 int bpf_map__fd(struct bpf_map *map)
 {
 	return map ? map->fd : -EINVAL;
@@ -1832,6 +1871,18 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 		return -ENOENT;
 	}
 
+	/*
+	 * If type is not specified, try to guess it based on
+	 * section name.
+	 */
+	if (type == BPF_PROG_TYPE_UNSPEC) {
+		type = bpf_program__guess_type(prog);
+		if (type == BPF_PROG_TYPE_UNSPEC) {
+			bpf_object__close(obj);
+			return -EINVAL;
+		}
+	}
+
 	bpf_program__set_type(prog, type);
 	err = bpf_object__load(obj);
 	if (err) {
-- 
cgit v1.2.3


From fe4d44b23f6b38194a92c6b8a50d921a071c4db4 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 13 Dec 2017 15:18:52 +0000
Subject: libbpf: prefer global symbols as bpf program name source

Libbpf picks the name of the first symbol in the corresponding
elf section to use as a program name. But without taking symbol's
scope into account it may end's up with some local label
as a program name. E.g.:

$ bpftool prog
1: type 15  name LBB0_10    tag 0390a5136ba23f5c
	loaded_at Dec 07/17:22  uid 0
	xlated 456B  not jited  memlock 4096B

Fix this by preferring global symbols as program name.

For instance:
$ bpftool prog
1: type 15  name bpf_prog1  tag 0390a5136ba23f5c
	loaded_at Dec 07/17:26  uid 0
	xlated 456B  not jited  memlock 4096B

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 205b7822fa0a..65d0d0aff4fa 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -387,6 +387,8 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 				continue;
 			if (sym.st_shndx != prog->idx)
 				continue;
+			if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
+				continue;
 
 			name = elf_strptr(obj->efile.elf,
 					  obj->efile.strtabidx,
-- 
cgit v1.2.3


From 49a086c201a9356287471aa5846a427bdcecc4f7 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 13 Dec 2017 15:18:53 +0000
Subject: bpftool: implement prog load command

Add the prog load command to load a bpf program from a specified
binary file and pin it to bpffs.

Usage description and examples are given in the corresponding man
page.

Syntax:
$ bpftool prog load OBJ FILE

FILE is a non-existing file on bpffs.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 10 +++-
 tools/bpf/bpftool/Documentation/bpftool.rst      |  2 +-
 tools/bpf/bpftool/common.c                       | 71 +++++++++++++-----------
 tools/bpf/bpftool/main.h                         |  1 +
 tools/bpf/bpftool/prog.c                         | 29 +++++++++-
 5 files changed, 79 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 36e8d1c3c40d..ffdb20e8280f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -15,7 +15,7 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **dump xlated** | **dump jited** | **pin** | **help** }
+	{ **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
 
 MAP COMMANDS
 =============
@@ -24,6 +24,7 @@ MAP COMMANDS
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
+|	**bpftool** **prog load** *OBJ* *FILE*
 |	**bpftool** **prog help**
 |
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
@@ -57,6 +58,11 @@ DESCRIPTION
 
 		  Note: *FILE* must be located in *bpffs* mount.
 
+	**bpftool prog load** *OBJ* *FILE*
+		  Load bpf program from binary *OBJ* and pin as *FILE*.
+
+		  Note: *FILE* must be located in *bpffs* mount.
+
 	**bpftool prog help**
 		  Print short help message.
 
@@ -126,8 +132,10 @@ EXAMPLES
 |
 | **# mount -t bpf none /sys/fs/bpf/**
 | **# bpftool prog pin id 10 /sys/fs/bpf/prog**
+| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2**
 | **# ls -l /sys/fs/bpf/**
 |   -rw------- 1 root root 0 Jul 22 01:43 prog
+|   -rw------- 1 root root 0 Jul 22 01:44 prog2
 
 **# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes**
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 926c03d5a8da..f547a0c0aa34 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -26,7 +26,7 @@ SYNOPSIS
 	| **pin** | **help** }
 
 	*PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin**
-	| **help** }
+	| **load** | **help** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 2bd3b280e6dd..b62c94e3997a 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -163,13 +163,49 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
 	return fd;
 }
 
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+int do_pin_fd(int fd, const char *name)
 {
 	char err_str[ERR_MAX_LEN];
-	unsigned int id;
-	char *endptr;
 	char *file;
 	char *dir;
+	int err = 0;
+
+	err = bpf_obj_pin(fd, name);
+	if (!err)
+		goto out;
+
+	file = malloc(strlen(name) + 1);
+	strcpy(file, name);
+	dir = dirname(file);
+
+	if (errno != EPERM || is_bpffs(dir)) {
+		p_err("can't pin the object (%s): %s", name, strerror(errno));
+		goto out_free;
+	}
+
+	/* Attempt to mount bpffs, then retry pinning. */
+	err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
+	if (!err) {
+		err = bpf_obj_pin(fd, name);
+		if (err)
+			p_err("can't pin the object (%s): %s", name,
+			      strerror(errno));
+	} else {
+		err_str[ERR_MAX_LEN - 1] = '\0';
+		p_err("can't mount BPF file system to pin the object (%s): %s",
+		      name, err_str);
+	}
+
+out_free:
+	free(file);
+out:
+	return err;
+}
+
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+{
+	unsigned int id;
+	char *endptr;
 	int err;
 	int fd;
 
@@ -195,35 +231,8 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
 		return -1;
 	}
 
-	err = bpf_obj_pin(fd, *argv);
-	if (!err)
-		goto out_close;
-
-	file = malloc(strlen(*argv) + 1);
-	strcpy(file, *argv);
-	dir = dirname(file);
-
-	if (errno != EPERM || is_bpffs(dir)) {
-		p_err("can't pin the object (%s): %s", *argv, strerror(errno));
-		goto out_free;
-	}
+	err = do_pin_fd(fd, *argv);
 
-	/* Attempt to mount bpffs, then retry pinning. */
-	err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
-	if (!err) {
-		err = bpf_obj_pin(fd, *argv);
-		if (err)
-			p_err("can't pin the object (%s): %s", *argv,
-			      strerror(errno));
-	} else {
-		err_str[ERR_MAX_LEN - 1] = '\0';
-		p_err("can't mount BPF file system to pin the object (%s): %s",
-		      *argv, err_str);
-	}
-
-out_free:
-	free(file);
-out_close:
 	close(fd);
 	return err;
 }
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index bff330b49791..bec1ccbb49c7 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -111,6 +111,7 @@ char *get_fdinfo(int fd, const char *key);
 int open_obj_pinned(char *path);
 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+int do_pin_fd(int fd, const char *name);
 
 int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index ad619b96c276..037484ceaeaf 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -45,6 +45,7 @@
 #include <sys/stat.h>
 
 #include <bpf.h>
+#include <libbpf.h>
 
 #include "main.h"
 #include "disasm.h"
@@ -635,6 +636,30 @@ static int do_pin(int argc, char **argv)
 	return err;
 }
 
+static int do_load(int argc, char **argv)
+{
+	struct bpf_object *obj;
+	int prog_fd;
+
+	if (argc != 2)
+		usage();
+
+	if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
+		p_err("failed to load program\n");
+		return -1;
+	}
+
+	if (do_pin_fd(prog_fd, argv[1])) {
+		p_err("failed to pin program\n");
+		return -1;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+
+	return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -647,13 +672,14 @@ static int do_help(int argc, char **argv)
 		"       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
+		"       %s %s load  OBJ  FILE\n"
 		"       %s %s help\n"
 		"\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
 
 	return 0;
 }
@@ -663,6 +689,7 @@ static const struct cmd cmds[] = {
 	{ "help",	do_help },
 	{ "dump",	do_dump },
 	{ "pin",	do_pin },
+	{ "load",	do_load },
 	{ 0 }
 };
 
-- 
cgit v1.2.3


From 5ccda64d38cc2800e3c7fab42a2fea46d44693e9 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 13 Dec 2017 15:18:54 +0000
Subject: bpftool: implement cgroup bpf operations

This patch adds basic cgroup bpf operations to bpftool:
cgroup list, attach and detach commands.

Usage is described in the corresponding man pages,
and examples are provided.

Syntax:
$ bpftool cgroup list CGROUP
$ bpftool cgroup attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]
$ bpftool cgroup detach CGROUP ATTACH_TYPE PROG

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 118 ++++++++
 tools/bpf/bpftool/Documentation/bpftool-map.rst    |   2 +-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   |   2 +-
 tools/bpf/bpftool/Documentation/bpftool.rst        |   6 +-
 tools/bpf/bpftool/cgroup.c                         | 307 +++++++++++++++++++++
 tools/bpf/bpftool/main.c                           |   3 +-
 tools/bpf/bpftool/main.h                           |   1 +
 7 files changed, 434 insertions(+), 5 deletions(-)
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
 create mode 100644 tools/bpf/bpftool/cgroup.c

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
new file mode 100644
index 000000000000..45c71b1f682b
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -0,0 +1,118 @@
+================
+bpftool-cgroup
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF progs
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **cgroup** *COMMAND*
+
+	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+	*COMMANDS* :=
+	{ **list** | **attach** | **detach** | **help** }
+
+MAP COMMANDS
+=============
+
+|	**bpftool** **cgroup list** *CGROUP*
+|	**bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
+|	**bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
+|	**bpftool** **cgroup help**
+|
+|	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+|	*ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* }
+|	*ATTACH_FLAGS* := { *multi* | *override* }
+
+DESCRIPTION
+===========
+	**bpftool cgroup list** *CGROUP*
+		  List all programs attached to the cgroup *CGROUP*.
+
+		  Output will start with program ID followed by attach type,
+		  attach flags and program name.
+
+	**bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
+		  Attach program *PROG* to the cgroup *CGROUP* with attach type
+		  *ATTACH_TYPE* and optional *ATTACH_FLAGS*.
+
+		  *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs
+		  some bpf program, the program in this cgroup yields to sub-cgroup
+		  program; **multi** if a sub-cgroup installs some bpf program,
+		  that cgroup program gets run in addition to the program in this
+		  cgroup.
+
+		  Only one program is allowed to be attached to a cgroup with
+		  no attach flags or the **override** flag. Attaching another
+		  program will release old program and attach the new one.
+
+		  Multiple programs are allowed to be attached to a cgroup with
+		  **multi**. They are executed in FIFO order (those that were
+		  attached first, run first).
+
+		  Non-default *ATTACH_FLAGS* are supported by kernel version 4.14
+		  and later.
+
+		  *ATTACH_TYPE* can be on of:
+		  **ingress** ingress path of the inet socket (since 4.10);
+		  **egress** egress path of the inet socket (since 4.10);
+		  **sock_create** opening of an inet socket (since 4.10);
+		  **sock_ops** various socket operations (since 4.12);
+		  **device** device access (since 4.15).
+
+	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
+		  Detach *PROG* from the cgroup *CGROUP* and attach type
+		  *ATTACH_TYPE*.
+
+	**bpftool prog help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-v, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+	-f, --bpffs
+		  Show file names of pinned programs.
+
+EXAMPLES
+========
+|
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# mkdir /sys/fs/cgroup/test.slice**
+| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog**
+| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi**
+
+**# bpftool cgroup list /sys/fs/cgroup/test.slice/**
+
+::
+
+    ID       AttachType      AttachFlags     Name
+    1        device          allow_multi     bpf_prog1
+
+|
+| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1**
+| **# bpftool cgroup list /sys/fs/cgroup/test.slice/**
+
+::
+
+    ID       AttachType      AttachFlags     Name
+
+SEE ALSO
+========
+	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 9f51a268eb06..421cabc417e6 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -128,4 +128,4 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8)
+	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index ffdb20e8280f..81c97c0e9b67 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -155,4 +155,4 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-map**\ (8)
+	**bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index f547a0c0aa34..6732a5a617e4 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** }
+	*OBJECT* := { **map** | **program** | **cgroup** }
 
 	*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -28,6 +28,8 @@ SYNOPSIS
 	*PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin**
 	| **load** | **help** }
 
+	*CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** }
+
 DESCRIPTION
 ===========
 	*bpftool* allows for inspection and simple modification of BPF objects
@@ -53,4 +55,4 @@ OPTIONS
 
 SEE ALSO
 ========
-	**bpftool-map**\ (8), **bpftool-prog**\ (8)
+	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
new file mode 100644
index 000000000000..34ca303d72bc
--- /dev/null
+++ b/tools/bpf/bpftool/cgroup.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2017 Facebook
+// Author: Roman Gushchin <guro@fb.com>
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+#define HELP_SPEC_ATTACH_FLAGS						\
+	"ATTACH_FLAGS := { multi | override }"
+
+#define HELP_SPEC_ATTACH_TYPES						\
+	"ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
+
+static const char * const attach_type_strings[] = {
+	[BPF_CGROUP_INET_INGRESS] = "ingress",
+	[BPF_CGROUP_INET_EGRESS] = "egress",
+	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
+	[BPF_CGROUP_DEVICE] = "device",
+	[__MAX_BPF_ATTACH_TYPE] = NULL,
+};
+
+static enum bpf_attach_type parse_attach_type(const char *str)
+{
+	enum bpf_attach_type type;
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+		if (attach_type_strings[type] &&
+		    is_prefix(str, attach_type_strings[type]))
+			return type;
+	}
+
+	return __MAX_BPF_ATTACH_TYPE;
+}
+
+static int list_bpf_prog(int id, const char *attach_type_str,
+			 const char *attach_flags_str)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	int prog_fd;
+
+	prog_fd = bpf_prog_get_fd_by_id(id);
+	if (prog_fd < 0)
+		return -1;
+
+	if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+		close(prog_fd);
+		return -1;
+	}
+
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_uint_field(json_wtr, "id", info.id);
+		jsonw_string_field(json_wtr, "attach_type",
+				   attach_type_str);
+		jsonw_string_field(json_wtr, "attach_flags",
+				   attach_flags_str);
+		jsonw_string_field(json_wtr, "name", info.name);
+		jsonw_end_object(json_wtr);
+	} else {
+		printf("%-8u %-15s %-15s %-15s\n", info.id,
+		       attach_type_str,
+		       attach_flags_str,
+		       info.name);
+	}
+
+	close(prog_fd);
+	return 0;
+}
+
+static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+{
+	__u32 prog_ids[1024] = {0};
+	char *attach_flags_str;
+	__u32 prog_cnt, iter;
+	__u32 attach_flags;
+	char buf[32];
+	int ret;
+
+	prog_cnt = ARRAY_SIZE(prog_ids);
+	ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids,
+			     &prog_cnt);
+	if (ret)
+		return ret;
+
+	if (prog_cnt == 0)
+		return 0;
+
+	switch (attach_flags) {
+	case BPF_F_ALLOW_MULTI:
+		attach_flags_str = "multi";
+		break;
+	case BPF_F_ALLOW_OVERRIDE:
+		attach_flags_str = "override";
+		break;
+	case 0:
+		attach_flags_str = "";
+		break;
+	default:
+		snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
+		attach_flags_str = buf;
+	}
+
+	for (iter = 0; iter < prog_cnt; iter++)
+		list_bpf_prog(prog_ids[iter], attach_type_strings[type],
+			      attach_flags_str);
+
+	return 0;
+}
+
+static int do_list(int argc, char **argv)
+{
+	enum bpf_attach_type type;
+	int cgroup_fd;
+	int ret = -1;
+
+	if (argc < 1) {
+		p_err("too few parameters for cgroup list\n");
+		goto exit;
+	} else if (argc > 1) {
+		p_err("too many parameters for cgroup list\n");
+		goto exit;
+	}
+
+	cgroup_fd = open(argv[0], O_RDONLY);
+	if (cgroup_fd < 0) {
+		p_err("can't open cgroup %s\n", argv[1]);
+		goto exit;
+	}
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	else
+		printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
+		       "AttachFlags", "Name");
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+		/*
+		 * Not all attach types may be supported, so it's expected,
+		 * that some requests will fail.
+		 * If we were able to get the list for at least one
+		 * attach type, let's return 0.
+		 */
+		if (list_attached_bpf_progs(cgroup_fd, type) == 0)
+			ret = 0;
+	}
+
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	close(cgroup_fd);
+exit:
+	return ret;
+}
+
+static int do_attach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int cgroup_fd, prog_fd;
+	int attach_flags = 0;
+	int ret = -1;
+	int i;
+
+	if (argc < 4) {
+		p_err("too few parameters for cgroup attach\n");
+		goto exit;
+	}
+
+	cgroup_fd = open(argv[0], O_RDONLY);
+	if (cgroup_fd < 0) {
+		p_err("can't open cgroup %s\n", argv[1]);
+		goto exit;
+	}
+
+	attach_type = parse_attach_type(argv[1]);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type\n");
+		goto exit_cgroup;
+	}
+
+	argc -= 2;
+	argv = &argv[2];
+	prog_fd = prog_parse_fd(&argc, &argv);
+	if (prog_fd < 0)
+		goto exit_cgroup;
+
+	for (i = 0; i < argc; i++) {
+		if (is_prefix(argv[i], "multi")) {
+			attach_flags |= BPF_F_ALLOW_MULTI;
+		} else if (is_prefix(argv[i], "override")) {
+			attach_flags |= BPF_F_ALLOW_OVERRIDE;
+		} else {
+			p_err("unknown option: %s\n", argv[i]);
+			goto exit_cgroup;
+		}
+	}
+
+	if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) {
+		p_err("failed to attach program");
+		goto exit_prog;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+
+	ret = 0;
+
+exit_prog:
+	close(prog_fd);
+exit_cgroup:
+	close(cgroup_fd);
+exit:
+	return ret;
+}
+
+static int do_detach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int prog_fd, cgroup_fd;
+	int ret = -1;
+
+	if (argc < 4) {
+		p_err("too few parameters for cgroup detach\n");
+		goto exit;
+	}
+
+	cgroup_fd = open(argv[0], O_RDONLY);
+	if (cgroup_fd < 0) {
+		p_err("can't open cgroup %s\n", argv[1]);
+		goto exit;
+	}
+
+	attach_type = parse_attach_type(argv[1]);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		goto exit_cgroup;
+	}
+
+	argc -= 2;
+	argv = &argv[2];
+	prog_fd = prog_parse_fd(&argc, &argv);
+	if (prog_fd < 0)
+		goto exit_cgroup;
+
+	if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) {
+		p_err("failed to detach program");
+		goto exit_prog;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+
+	ret = 0;
+
+exit_prog:
+	close(prog_fd);
+exit_cgroup:
+	close(cgroup_fd);
+exit:
+	return ret;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s %s list CGROUP\n"
+		"       %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
+		"       %s %s help\n"
+		"\n"
+		"       " HELP_SPEC_ATTACH_TYPES "\n"
+		"       " HELP_SPEC_ATTACH_FLAGS "\n"
+		"       " HELP_SPEC_PROGRAM "\n"
+		"       " HELP_SPEC_OPTIONS "\n"
+		"",
+		bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "list",	do_list },
+	{ "attach",	do_attach },
+	{ "detach",	do_detach },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_cgroup(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d294bc8168be..ecd53ccf1239 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -85,7 +85,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map }\n"
+		"       OBJECT := { prog | map | cgroup }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -173,6 +173,7 @@ static const struct cmd cmds[] = {
 	{ "batch",	do_batch },
 	{ "prog",	do_prog },
 	{ "map",	do_map },
+	{ "cgroup",	do_cgroup },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index bec1ccbb49c7..8f6d3cac0347 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -115,6 +115,7 @@ int do_pin_fd(int fd, const char *name);
 
 int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
+int do_cgroup(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 
-- 
cgit v1.2.3


From a7ff3eca95a5f9bc24132b5975f40dac10710ed1 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 14 Dec 2017 17:55:07 -0800
Subject: selftests/bpf: add verifier tests for bpf_call

Add extensive set of tests for bpf_call verification logic:

calls: basic sanity
calls: using r0 returned by callee
calls: callee is using r1
calls: callee using args1
calls: callee using wrong args2
calls: callee using two args
calls: callee changing pkt pointers
calls: two calls with args
calls: two calls with bad jump
calls: recursive call. test1
calls: recursive call. test2
calls: unreachable code
calls: invalid call
calls: jumping across function bodies. test1
calls: jumping across function bodies. test2
calls: call without exit
calls: call into middle of ld_imm64
calls: call into middle of other call
calls: two calls with bad fallthrough
calls: two calls with stack read
calls: two calls with stack write
calls: spill into caller stack frame
calls: two calls with stack write and void return
calls: ambiguous return value
calls: two calls that return map_value
calls: two calls that return map_value with bool condition
calls: two calls that return map_value with incorrect bool check
calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1
calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2
calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3
calls: two calls that receive map_value_ptr_or_null via arg. test1
calls: two calls that receive map_value_ptr_or_null via arg. test2
calls: pkt_ptr spill into caller stack

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 961 +++++++++++++++++++++++++++-
 1 file changed, 960 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30cf63c..88f389c6ec48 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2,6 +2,7 @@
  * Testsuite for eBPF verifier
  *
  * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2017 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -277,7 +278,7 @@ static struct bpf_test tests[] = {
 		.insns = {
 			BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
 		},
-		.errstr = "jump out of range",
+		.errstr = "not an exit",
 		.result = REJECT,
 	},
 	{
@@ -8097,6 +8098,964 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
 	},
+	{
+		"calls: basic sanity",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: using r0 returned by callee",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: callee is using r1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: callee using args1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: callee using wrong args2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"calls: callee using two args",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
+				    offsetof(struct __sk_buff, len)),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: callee changing pkt pointers",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			/* clear_all_pkt_pointers() has to walk all frames
+			 * to make sure that pkt pointers in the caller
+			 * are cleared when callee is calling a helper that
+			 * adjusts packet size
+			 */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_xdp_adjust_head),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R6 invalid mem access 'inv'",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"calls: two calls with args",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls with bad jump",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range from insn 11 to 9",
+		.result = REJECT,
+	},
+	{
+		"calls: recursive call. test1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"calls: recursive call. test2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"calls: unreachable code",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "unreachable insn 6",
+		.result = REJECT,
+	},
+	{
+		"calls: invalid call",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "invalid destination",
+		.result = REJECT,
+	},
+	{
+		"calls: jumping across function bodies. test1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: jumping across function bodies. test2",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: call without exit",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "not an exit",
+		.result = REJECT,
+	},
+	{
+		"calls: call into middle of ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "last insn",
+		.result = REJECT,
+	},
+	{
+		"calls: call into middle of other call",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "last insn",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with bad fallthrough",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "not an exit",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with stack read",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls with stack write",
+		.insns = {
+			/* main prog */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
+			/* write into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* read from stack frame of main prog */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: spill into caller stack frame",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "cannot spill",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with stack write and void return",
+		.insns = {
+			/* main prog */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* write into stack frame of main prog */
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+			BPF_EXIT_INSN(), /* void return */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: ambiguous return value",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "allowed for root only",
+		.result_unpriv = REJECT,
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls that return map_value",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			/* fetch secound map_value_ptr from the stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* call 3rd function twice */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* first time with fp-8 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			/* second time with fp-16 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			/* lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(), /* return 0 */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.fixup_map1 = { 23 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls that return map_value with bool condition",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* call 3rd function twice */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* first time with fp-8 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			/* second time with fp-16 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+			/* fetch secound map_value_ptr from the stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			/* lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(), /* return 0 */
+			/* write map_value_ptr into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(), /* return 1 */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.fixup_map1 = { 23 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls that return map_value with incorrect bool check",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* call 3rd function twice */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* first time with fp-8 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			/* second time with fp-16 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			/* fetch secound map_value_ptr from the stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			/* lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(), /* return 0 */
+			/* write map_value_ptr into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(), /* return 1 */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.fixup_map1 = { 23 },
+		.result = REJECT,
+		.errstr = "invalid read from stack off -16+0 size 8",
+	},
+	{
+		"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),  /* 34 */
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
+	},
+	{
+		"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),  /* 34 */
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),  // 26
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
+			BPF_JMP_IMM(BPF_JA, 0, 0, -30),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -8),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
+	},
+	{
+		"calls: two calls that receive map_value_ptr_or_null via arg. test1",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls that receive map_value_ptr_or_null via arg. test2",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 0 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+	},
+	{
+		"calls: pkt_ptr spill into caller stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			/* now the pkt range is verified, read pkt_ptr from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From d98588cef04529aa326c6cbc0cfa01a3a3e00ef5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 14 Dec 2017 17:55:09 -0800
Subject: selftests/bpf: add tests for stack_zero tracking

adjust two tests, since verifier got smarter
and add new one to test stack_zero logic

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 66 ++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 88f389c6ec48..eaf294822a8f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5649,7 +5649,7 @@ static struct bpf_test tests[] = {
 		"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 0),
-			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -5884,7 +5884,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
-			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
@@ -9056,6 +9056,68 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"calls: caller stack init to zero or map_value_or_null",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			/* fetch map_value_or_null or const_zero from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			/* store into map_value */
+			BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* if (ctx == 0) return; */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+			/* else bpf_map_lookup() and *(fp - 8) = r0 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 13 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"calls: stack init to zero and pruning",
+		.insns = {
+			/* first make allocated_stack 16 byte */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+			/* now fork the execution such that the false branch
+			 * of JGT insn will be verified second and it skisp zero
+			 * init of fp-8 stack slot. If stack liveness marking
+			 * is missing live_read marks from call map_lookup
+			 * processing then pruning will incorrectly assume
+			 * that fp-8 stack slot was unused in the fall-through
+			 * branch and will accept the program incorrectly
+			 */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 6 },
+		.errstr = "invalid indirect read from stack off -8+0 size 8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 48cca7e44f9f8268fdcd4351e2f19ff2275119d1 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 14 Dec 2017 17:55:10 -0800
Subject: libbpf: add support for bpf_call

- recognize relocation emitted by llvm
- since all regular function will be kept in .text section and llvm
  takes care of pc-relative offsets in bpf_call instruction
  simply copy all of .text to relevant program section while adjusting
  bpf_call instructions in program section to point to newly copied
  body of instructions from .text
- do so for all programs in the elf file
- set all programs types to the one passed to bpf_prog_load()

Note for elf files with multiple programs that use different
functions in .text section we need to do 'linker' style logic.
This work is still TBD

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h |   6 ++
 tools/lib/bpf/bpf.h            |   2 +-
 tools/lib/bpf/libbpf.c         | 170 ++++++++++++++++++++++++++++++-----------
 3 files changed, 134 insertions(+), 44 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index cf446c25c0ec..db1b0923a308 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -197,8 +197,14 @@ enum bpf_attach_type {
  */
 #define BPF_F_STRICT_ALIGNMENT	(1U << 0)
 
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
 #define BPF_PSEUDO_MAP_FD	1
 
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL		1
+
 /* flags for BPF_MAP_UPDATE_ELEM command */
 #define BPF_ANY		0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6534889e2b2f..9f44c196931e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -40,7 +40,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
 			  __u32 map_flags);
 
 /* Recommend log buffer size */
-#define BPF_LOG_BUF_SIZE 65536
+#define BPF_LOG_BUF_SIZE (256 * 1024)
 int bpf_load_program_name(enum bpf_prog_type type, const char *name,
 			  const struct bpf_insn *insns,
 			  size_t insns_cnt, const char *license,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 65d0d0aff4fa..5b83875b3594 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -174,12 +174,19 @@ struct bpf_program {
 	char *name;
 	char *section_name;
 	struct bpf_insn *insns;
-	size_t insns_cnt;
+	size_t insns_cnt, main_prog_cnt;
 	enum bpf_prog_type type;
 
-	struct {
+	struct reloc_desc {
+		enum {
+			RELO_LD64,
+			RELO_CALL,
+		} type;
 		int insn_idx;
-		int map_idx;
+		union {
+			int map_idx;
+			int text_off;
+		};
 	} *reloc_desc;
 	int nr_reloc;
 
@@ -234,6 +241,7 @@ struct bpf_object {
 		} *reloc;
 		int nr_reloc;
 		int maps_shndx;
+		int text_shndx;
 	} efile;
 	/*
 	 * All loaded bpf_object is linked in a list, which is
@@ -375,9 +383,13 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 	size_t pi, si;
 
 	for (pi = 0; pi < obj->nr_programs; pi++) {
-		char *name = NULL;
+		const char *name = NULL;
 
 		prog = &obj->programs[pi];
+		if (prog->idx == obj->efile.text_shndx) {
+			name = ".text";
+			goto skip_search;
+		}
 
 		for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
 		     si++) {
@@ -405,7 +417,7 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 				   prog->section_name);
 			return -EINVAL;
 		}
-
+skip_search:
 		prog->name = strdup(name);
 		if (!prog->name) {
 			pr_warning("failed to allocate memory for prog sym %s\n",
@@ -795,6 +807,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		} else if ((sh.sh_type == SHT_PROGBITS) &&
 			   (sh.sh_flags & SHF_EXECINSTR) &&
 			   (data->d_size > 0)) {
+			if (strcmp(name, ".text") == 0)
+				obj->efile.text_shndx = idx;
 			err = bpf_object__add_program(obj, data->d_buf,
 						      data->d_size, name, idx);
 			if (err) {
@@ -856,11 +870,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
 }
 
 static int
-bpf_program__collect_reloc(struct bpf_program *prog,
-			   size_t nr_maps, GElf_Shdr *shdr,
-			   Elf_Data *data, Elf_Data *symbols,
-			   int maps_shndx, struct bpf_map *maps)
+bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
+			   Elf_Data *data, struct bpf_object *obj)
 {
+	Elf_Data *symbols = obj->efile.symbols;
+	int text_shndx = obj->efile.text_shndx;
+	int maps_shndx = obj->efile.maps_shndx;
+	struct bpf_map *maps = obj->maps;
+	size_t nr_maps = obj->nr_maps;
 	int i, nrels;
 
 	pr_debug("collecting relocating info for: '%s'\n",
@@ -893,8 +910,10 @@ bpf_program__collect_reloc(struct bpf_program *prog,
 				   GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
 		}
+		pr_debug("relo for %ld value %ld name %d\n",
+			 rel.r_info >> 32, sym.st_value, sym.st_name);
 
-		if (sym.st_shndx != maps_shndx) {
+		if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
 			pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
 				   prog->section_name, sym.st_shndx);
 			return -LIBBPF_ERRNO__RELOC;
@@ -903,6 +922,17 @@ bpf_program__collect_reloc(struct bpf_program *prog,
 		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
 		pr_debug("relocation: insn_idx=%u\n", insn_idx);
 
+		if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
+			if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
+				pr_warning("incorrect bpf_call opcode\n");
+				return -LIBBPF_ERRNO__RELOC;
+			}
+			prog->reloc_desc[i].type = RELO_CALL;
+			prog->reloc_desc[i].insn_idx = insn_idx;
+			prog->reloc_desc[i].text_off = sym.st_value;
+			continue;
+		}
+
 		if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
 			pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
 				   insn_idx, insns[insn_idx].code);
@@ -924,6 +954,7 @@ bpf_program__collect_reloc(struct bpf_program *prog,
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
+		prog->reloc_desc[i].type = RELO_LD64;
 		prog->reloc_desc[i].insn_idx = insn_idx;
 		prog->reloc_desc[i].map_idx = map_idx;
 	}
@@ -962,28 +993,77 @@ bpf_object__create_maps(struct bpf_object *obj)
 	return 0;
 }
 
+static int
+bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
+			struct reloc_desc *relo)
+{
+	struct bpf_insn *insn, *new_insn;
+	struct bpf_program *text;
+	size_t new_cnt;
+
+	if (relo->type != RELO_CALL)
+		return -LIBBPF_ERRNO__RELOC;
+
+	if (prog->idx == obj->efile.text_shndx) {
+		pr_warning("relo in .text insn %d into off %d\n",
+			   relo->insn_idx, relo->text_off);
+		return -LIBBPF_ERRNO__RELOC;
+	}
+
+	if (prog->main_prog_cnt == 0) {
+		text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
+		if (!text) {
+			pr_warning("no .text section found yet relo into text exist\n");
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		new_cnt = prog->insns_cnt + text->insns_cnt;
+		new_insn = realloc(prog->insns, new_cnt * sizeof(*insn));
+		if (!new_insn) {
+			pr_warning("oom in prog realloc\n");
+			return -ENOMEM;
+		}
+		memcpy(new_insn + prog->insns_cnt, text->insns,
+		       text->insns_cnt * sizeof(*insn));
+		prog->insns = new_insn;
+		prog->main_prog_cnt = prog->insns_cnt;
+		prog->insns_cnt = new_cnt;
+	}
+	insn = &prog->insns[relo->insn_idx];
+	insn->imm += prog->main_prog_cnt - relo->insn_idx;
+	pr_debug("added %zd insn from %s to prog %s\n",
+		 text->insns_cnt, text->section_name, prog->section_name);
+	return 0;
+}
+
 static int
 bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 {
-	int i;
+	int i, err;
 
 	if (!prog || !prog->reloc_desc)
 		return 0;
 
 	for (i = 0; i < prog->nr_reloc; i++) {
-		int insn_idx, map_idx;
-		struct bpf_insn *insns = prog->insns;
+		if (prog->reloc_desc[i].type == RELO_LD64) {
+			struct bpf_insn *insns = prog->insns;
+			int insn_idx, map_idx;
 
-		insn_idx = prog->reloc_desc[i].insn_idx;
-		map_idx = prog->reloc_desc[i].map_idx;
+			insn_idx = prog->reloc_desc[i].insn_idx;
+			map_idx = prog->reloc_desc[i].map_idx;
 
-		if (insn_idx >= (int)prog->insns_cnt) {
-			pr_warning("relocation out of range: '%s'\n",
-				   prog->section_name);
-			return -LIBBPF_ERRNO__RELOC;
+			if (insn_idx >= (int)prog->insns_cnt) {
+				pr_warning("relocation out of range: '%s'\n",
+					   prog->section_name);
+				return -LIBBPF_ERRNO__RELOC;
+			}
+			insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+			insns[insn_idx].imm = obj->maps[map_idx].fd;
+		} else {
+			err = bpf_program__reloc_text(prog, obj,
+						      &prog->reloc_desc[i]);
+			if (err)
+				return err;
 		}
-		insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
-		insns[insn_idx].imm = obj->maps[map_idx].fd;
 	}
 
 	zfree(&prog->reloc_desc);
@@ -1026,7 +1106,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 		Elf_Data *data = obj->efile.reloc[i].data;
 		int idx = shdr->sh_info;
 		struct bpf_program *prog;
-		size_t nr_maps = obj->nr_maps;
 
 		if (shdr->sh_type != SHT_REL) {
 			pr_warning("internal error at %d\n", __LINE__);
@@ -1040,11 +1119,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-		err = bpf_program__collect_reloc(prog, nr_maps,
+		err = bpf_program__collect_reloc(prog,
 						 shdr, data,
-						 obj->efile.symbols,
-						 obj->efile.maps_shndx,
-						 obj->maps);
+						 obj);
 		if (err)
 			return err;
 	}
@@ -1197,6 +1274,8 @@ bpf_object__load_progs(struct bpf_object *obj)
 	int err;
 
 	for (i = 0; i < obj->nr_programs; i++) {
+		if (obj->programs[i].idx == obj->efile.text_shndx)
+			continue;
 		err = bpf_program__load(&obj->programs[i],
 					obj->license,
 					obj->kern_version);
@@ -1859,7 +1938,7 @@ long libbpf_get_error(const void *ptr)
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
 		  struct bpf_object **pobj, int *prog_fd)
 {
-	struct bpf_program *prog;
+	struct bpf_program *prog, *first_prog = NULL;
 	struct bpf_object *obj;
 	int err;
 
@@ -1867,25 +1946,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 	if (IS_ERR(obj))
 		return -ENOENT;
 
-	prog = bpf_program__next(NULL, obj);
-	if (!prog) {
-		bpf_object__close(obj);
-		return -ENOENT;
-	}
-
-	/*
-	 * If type is not specified, try to guess it based on
-	 * section name.
-	 */
-	if (type == BPF_PROG_TYPE_UNSPEC) {
-		type = bpf_program__guess_type(prog);
+	bpf_object__for_each_program(prog, obj) {
+		/*
+		 * If type is not specified, try to guess it based on
+		 * section name.
+		 */
 		if (type == BPF_PROG_TYPE_UNSPEC) {
-			bpf_object__close(obj);
-			return -EINVAL;
+			type = bpf_program__guess_type(prog);
+			if (type == BPF_PROG_TYPE_UNSPEC) {
+				bpf_object__close(obj);
+				return -EINVAL;
+			}
 		}
+
+		bpf_program__set_type(prog, type);
+		if (prog->idx != obj->efile.text_shndx && !first_prog)
+			first_prog = prog;
+	}
+
+	if (!first_prog) {
+		pr_warning("object file doesn't contain bpf program\n");
+		bpf_object__close(obj);
+		return -ENOENT;
 	}
 
-	bpf_program__set_type(prog, type);
 	err = bpf_object__load(obj);
 	if (err) {
 		bpf_object__close(obj);
@@ -1893,6 +1977,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 	}
 
 	*pobj = obj;
-	*prog_fd = bpf_program__fd(prog);
+	*prog_fd = bpf_program__fd(first_prog);
 	return 0;
 }
-- 
cgit v1.2.3


From 3bc35c63cb70466c78d3972ceaf8205aa463a192 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 14 Dec 2017 17:55:11 -0800
Subject: selftests/bpf: add bpf_call test

strip always_inline from test_l4lb.c and compile it with -fno-inline
to let verifier go through 11 function with various function arguments
and return values

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile             |  11 +-
 tools/testing/selftests/bpf/test_l4lb_noinline.c | 473 +++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c         |  14 +-
 3 files changed, 492 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_l4lb_noinline.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 255fb1f50f6b..6970d073df5b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -17,7 +17,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
-	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o
+	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
+	test_l4lb_noinline.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
@@ -49,8 +50,12 @@ else
   CPU ?= generic
 endif
 
+CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+	      -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+
 %.o: %.c
-	$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
-		 -Wno-compare-distinct-pointer-types          \
+	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
 	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c
new file mode 100644
index 000000000000..ba44a14e6dc4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct vip_stats {
+	__u64 bytes;
+	__u64 pkts;
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[ETH_ALEN];
+	unsigned char eth_source[ETH_ALEN];
+	unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct vip_stats),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = CTL_MAP_SIZE,
+};
+
+static __u32 get_packet_hash(struct packet_description *pckt,
+			     bool ipv6)
+{
+	if (ipv6)
+		return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+				    pckt->ports, CH_RINGS_SIZE);
+	else
+		return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static bool get_packet_dst(struct real_definition **real,
+			   struct packet_description *pckt,
+			   struct vip_meta *vip_info,
+			   bool is_ipv6)
+{
+	__u32 hash = get_packet_hash(pckt, is_ipv6);
+	__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+	__u32 *real_pos;
+
+	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
+	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+		return 0;
+
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return false;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return false;
+	return true;
+}
+
+static int parse_icmpv6(void *data, void *data_end, __u64 off,
+			struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+		return TC_ACT_OK;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return TC_ACT_SHOT;
+	pckt->proto = ip6h->nexthdr;
+	pckt->flags |= F_ICMP;
+	memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+	memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+	return TC_ACT_UNSPEC;
+}
+
+static int parse_icmp(void *data, void *data_end, __u64 off,
+		      struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr->code != ICMP_FRAG_NEEDED)
+		return TC_ACT_OK;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (iph->ihl != 5)
+		return TC_ACT_SHOT;
+	pckt->proto = iph->protocol;
+	pckt->flags |= F_ICMP;
+	pckt->src = iph->daddr;
+	pckt->dst = iph->saddr;
+	return TC_ACT_UNSPEC;
+}
+
+static bool parse_udp(void *data, __u64 off, void *data_end,
+		      struct packet_description *pckt)
+{
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return false;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = udp->source;
+		pckt->port16[1] = udp->dest;
+	} else {
+		pckt->port16[0] = udp->dest;
+		pckt->port16[1] = udp->source;
+	}
+	return true;
+}
+
+static bool parse_tcp(void *data, __u64 off, void *data_end,
+		      struct packet_description *pckt)
+{
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return false;
+
+	if (tcp->syn)
+		pckt->flags |= F_SYN_SET;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = tcp->source;
+		pckt->port16[1] = tcp->dest;
+	} else {
+		pckt->port16[0] = tcp->dest;
+		pckt->port16[1] = tcp->source;
+	}
+	return true;
+}
+
+static int process_packet(void *data, __u64 off, void *data_end,
+			  bool is_ipv6, struct __sk_buff *skb)
+{
+	void *pkt_start = (void *)(long)skb->data;
+	struct packet_description pckt = {};
+	struct eth_hdr *eth = pkt_start;
+	struct bpf_tunnel_key tkey = {};
+	struct vip_stats *data_stats;
+	struct real_definition *dst;
+	struct vip_meta *vip_info;
+	struct ctl_value *cval;
+	__u32 v4_intf_pos = 1;
+	__u32 v6_intf_pos = 2;
+	struct ipv6hdr *ip6h;
+	struct vip vip = {};
+	struct iphdr *iph;
+	int tun_flag = 0;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u32 ifindex;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	tkey.tunnel_ttl = 64;
+	if (is_ipv6) {
+		ip6h = data + off;
+		if (ip6h + 1 > data_end)
+			return TC_ACT_SHOT;
+
+		iph_len = sizeof(struct ipv6hdr);
+		protocol = ip6h->nexthdr;
+		pckt.proto = protocol;
+		pkt_bytes = bpf_ntohs(ip6h->payload_len);
+		off += iph_len;
+		if (protocol == IPPROTO_FRAGMENT) {
+			return TC_ACT_SHOT;
+		} else if (protocol == IPPROTO_ICMPV6) {
+			action = parse_icmpv6(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV6_PLUS_ICMP_HDR;
+		} else {
+			memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+			memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+		}
+	} else {
+		iph = data + off;
+		if (iph + 1 > data_end)
+			return TC_ACT_SHOT;
+		if (iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		protocol = iph->protocol;
+		pckt.proto = protocol;
+		pkt_bytes = bpf_ntohs(iph->tot_len);
+		off += IPV4_HDR_LEN_NO_OPT;
+
+		if (iph->frag_off & PCKT_FRAGMENTED)
+			return TC_ACT_SHOT;
+		if (protocol == IPPROTO_ICMP) {
+			action = parse_icmp(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV4_PLUS_ICMP_HDR;
+		} else {
+			pckt.src = iph->saddr;
+			pckt.dst = iph->daddr;
+		}
+	}
+	protocol = pckt.proto;
+
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else {
+		return TC_ACT_SHOT;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.daddr.v6, pckt.dstv6, 16);
+	else
+		vip.daddr.v4 = pckt.dst;
+
+	vip.dport = pckt.port16[1];
+	vip.protocol = pckt.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.dport = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return TC_ACT_SHOT;
+		pckt.port16[1] = 0;
+	}
+
+	if (vip_info->flags & F_HASH_NO_SRC_PORT)
+		pckt.port16[0] = 0;
+
+	if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+		return TC_ACT_SHOT;
+
+	if (dst->flags & F_IPV6) {
+		cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+		tun_flag = BPF_F_TUNINFO_IPV6;
+	} else {
+		cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		tkey.remote_ipv4 = dst->dst;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return TC_ACT_SHOT;
+	data_stats->pkts++;
+	data_stats->bytes += pkt_bytes;
+	bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+	*(u32 *)eth->eth_dest = tkey.remote_ipv4;
+	return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return TC_ACT_SHOT;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == bpf_htons(ETH_P_IP))
+		return process_packet(data, nh_off, data_end, false, ctx);
+	else if (eth_proto == bpf_htons(ETH_P_IPV6))
+		return process_packet(data, nh_off, data_end, true, ctx);
+	else
+		return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1d7d2149163a..abff83bf8d40 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -169,10 +169,9 @@ out:
 #define NUM_ITER 100000
 #define VIP_NUM 5
 
-static void test_l4lb(void)
+static void test_l4lb(const char *file)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
-	const char *file = "./test_l4lb.o";
 	struct vip key = {.protocol = 6};
 	struct vip_meta {
 		__u32 flags;
@@ -249,6 +248,15 @@ out:
 	bpf_object__close(obj);
 }
 
+static void test_l4lb_all(void)
+{
+	const char *file1 = "./test_l4lb.o";
+	const char *file2 = "./test_l4lb_noinline.o";
+
+	test_l4lb(file1);
+	test_l4lb(file2);
+}
+
 static void test_tcp_estats(void)
 {
 	const char *file = "./test_tcp_estats.o";
@@ -757,7 +765,7 @@ int main(void)
 
 	test_pkt_access();
 	test_xdp();
-	test_l4lb();
+	test_l4lb_all();
 	test_tcp_estats();
 	test_bpf_obj_id();
 	test_pkt_md_access();
-- 
cgit v1.2.3


From b0b04fc49e3b97a6039b9b658798efdcda71478d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 14 Dec 2017 17:55:12 -0800
Subject: selftests/bpf: add xdp noinline test

add large semi-artificial XDP test with 18 functions to stress test
bpf call verification logic

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile            |   3 +-
 tools/testing/selftests/bpf/test_progs.c        |  81 +++
 tools/testing/selftests/bpf/test_xdp_noinline.c | 833 ++++++++++++++++++++++++
 3 files changed, 916 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_xdp_noinline.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6970d073df5b..7ef9601d04bf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -18,7 +18,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-	test_l4lb_noinline.o
+	test_l4lb_noinline.o test_xdp_noinline.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
@@ -54,6 +54,7 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 	      -Wno-compare-distinct-pointer-types
 
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
 %.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index abff83bf8d40..6472ca98690e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -257,6 +257,86 @@ static void test_l4lb_all(void)
 	test_l4lb(file2);
 }
 
+static void test_xdp_noinline(void)
+{
+	const char *file = "./test_xdp_noinline.o";
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct vip key = {.protocol = 6};
+	struct vip_meta {
+		__u32 flags;
+		__u32 vip_num;
+	} value = {.vip_num = VIP_NUM};
+	__u32 stats_key = VIP_NUM;
+	struct vip_stats {
+		__u64 bytes;
+		__u64 pkts;
+	} stats[nr_cpus];
+	struct real_definition {
+		union {
+			__be32 dst;
+			__be32 dstv6[4];
+		};
+		__u8 flags;
+	} real_def = {.dst = MAGIC_VAL};
+	__u32 ch_key = 11, real_num = 3;
+	__u32 duration, retval, size;
+	int err, i, prog_fd, map_fd;
+	__u64 bytes = 0, pkts = 0;
+	struct bpf_object *obj;
+	char buf[128];
+	u32 *magic = (u32 *)buf;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "vip_map");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key, &value, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "ch_rings");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "reals");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 1 || size != 54 ||
+	      *magic != MAGIC_VAL, "ipv4",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 1 || size != 74 ||
+	      *magic != MAGIC_VAL, "ipv6",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	map_fd = bpf_find_map(__func__, obj, "stats");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+	for (i = 0; i < nr_cpus; i++) {
+		bytes += stats[i].bytes;
+		pkts += stats[i].pkts;
+	}
+	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+		error_cnt++;
+		printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+	}
+out:
+	bpf_object__close(obj);
+}
+
 static void test_tcp_estats(void)
 {
 	const char *file = "./test_tcp_estats.o";
@@ -766,6 +846,7 @@ int main(void)
 	test_pkt_access();
 	test_xdp();
 	test_l4lb_all();
+	test_xdp_noinline();
 	test_tcp_estats();
 	test_bpf_obj_id();
 	test_pkt_md_access();
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 000000000000..5e4aac74f9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			##__VA_ARGS__);			\
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+};
+
+struct packet_description {
+	struct flow_key flow;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_definition {
+	union {
+		__be32 vip;
+		__be32 vipv6[4];
+	};
+	__u16 port;
+	__u16 family;
+	__u8 proto;
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_pos_lru {
+	__u32 pos;
+	__u64 atime;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct lb_stats {
+	__u64 v2;
+	__u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip_definition),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = 512,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+	.type = BPF_MAP_TYPE_LRU_HASH,
+	.key_size = sizeof(struct flow_key),
+	.value_size = sizeof(struct real_pos_lru),
+	.max_entries = 300,
+	.map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 12 * 655,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = 40,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct lb_stats),
+	.max_entries = 515,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = 16,
+	.map_flags = 0,
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[6];
+	unsigned char eth_source[6];
+	unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+	__u64 off = sizeof(struct eth_hdr);
+	if (is_ipv6) {
+		off += sizeof(struct ipv6hdr);
+		if (is_icmp)
+			off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+	} else {
+		off += sizeof(struct iphdr);
+		if (is_icmp)
+			off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+	}
+	return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+	       bool is_ipv6, struct packet_description *pckt)
+{
+
+	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+	__u64 off = calc_offset(is_ipv6, is_icmp);
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return 0;
+	if (!is_icmp) {
+		pckt->flow.port16[0] = udp->source;
+		pckt->flow.port16[1] = udp->dest;
+	} else {
+		pckt->flow.port16[0] = udp->dest;
+		pckt->flow.port16[1] = udp->source;
+	}
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+	       bool is_ipv6, struct packet_description *pckt)
+{
+
+	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+	__u64 off = calc_offset(is_ipv6, is_icmp);
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return 0;
+	if (tcp->syn)
+		pckt->flags |= (1 << 1);
+	if (!is_icmp) {
+		pckt->flow.port16[0] = tcp->source;
+		pckt->flow.port16[1] = tcp->dest;
+	} else {
+		pckt->flow.port16[0] = tcp->dest;
+		pckt->flow.port16[1] = tcp->source;
+	}
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+	      struct packet_description *pckt,
+	      struct real_definition *dst, __u32 pkt_bytes)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+	struct ipv6hdr *ip6h;
+	__u32 ip_suffix;
+	void *data_end;
+	void *data;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return 0;
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	new_eth = data;
+	ip6h = data + sizeof(struct eth_hdr);
+	old_eth = data + sizeof(struct ipv6hdr);
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end || ip6h + 1 > data_end)
+		return 0;
+	memcpy(new_eth->eth_dest, cval->mac, 6);
+	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 56710;
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+	ip6h->payload_len =
+	    __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+	ip6h->hop_limit = 4;
+
+	ip6h->saddr.in6_u.u6_addr32[0] = 1;
+	ip6h->saddr.in6_u.u6_addr32[1] = 2;
+	ip6h->saddr.in6_u.u6_addr32[2] = 3;
+	ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+	memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+	      struct packet_description *pckt,
+	      struct real_definition *dst, __u32 pkt_bytes)
+{
+
+	__u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+	__u16 *next_iph_u16;
+	struct iphdr *iph;
+	__u32 csum = 0;
+	void *data_end;
+	void *data;
+
+	ip_suffix <<= 15;
+	ip_suffix ^= pckt->flow.src;
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return 0;
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	new_eth = data;
+	iph = data + sizeof(struct eth_hdr);
+	old_eth = data + sizeof(struct iphdr);
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end || iph + 1 > data_end)
+		return 0;
+	memcpy(new_eth->eth_dest, cval->mac, 6);
+	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 8;
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->frag_off = 0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 1;
+	iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+	/* don't update iph->daddr, since it will overwrite old eth_proto
+	 * and multiple iterations of bpf_prog_run() will fail
+	 */
+
+	iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+	iph->ttl = 4;
+
+	next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+		csum += *next_iph_u16++;
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+		return 0;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+
+	old_eth = *data;
+	new_eth = *data + sizeof(struct ipv6hdr);
+	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+	if (inner_v4)
+		new_eth->eth_proto = 8;
+	else
+		new_eth->eth_proto = 56710;
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+		return 0;
+	*data = (void *)(long)xdp->data;
+	*data_end = (void *)(long)xdp->data_end;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+
+	old_eth = *data;
+	new_eth = *data + sizeof(struct iphdr);
+	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 8;
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+		return 0;
+	*data = (void *)(long)xdp->data;
+	*data_end = (void *)(long)xdp->data_end;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+	unsigned char tmp_mac[6];
+	struct eth_hdr *eth;
+
+	eth = data;
+	memcpy(tmp_mac, eth->eth_source, 6);
+	memcpy(eth->eth_source, eth->eth_dest, 6);
+	memcpy(eth->eth_dest, tmp_mac, 6);
+	return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+	struct icmphdr *icmp_hdr;
+	__u16 *next_iph_u16;
+	__u32 tmp_addr = 0;
+	struct iphdr *iph;
+	__u32 csum1 = 0;
+	__u32 csum = 0;
+	__u64 off = 0;
+
+	if (data + sizeof(struct eth_hdr)
+	     + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+		return XDP_DROP;
+	off += sizeof(struct eth_hdr);
+	iph = data + off;
+	off += sizeof(struct iphdr);
+	icmp_hdr = data + off;
+	icmp_hdr->type = 0;
+	icmp_hdr->checksum += 0x0007;
+	iph->ttl = 4;
+	tmp_addr = iph->daddr;
+	iph->daddr = iph->saddr;
+	iph->saddr = tmp_addr;
+	iph->check = 0;
+	next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+		csum += *next_iph_u16++;
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+	return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+	__be32 tmp_addr[4];
+	__u64 off = 0;
+
+	if (data + sizeof(struct eth_hdr)
+	     + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+		return XDP_DROP;
+	off += sizeof(struct eth_hdr);
+	ip6h = data + off;
+	off += sizeof(struct ipv6hdr);
+	icmp_hdr = data + off;
+	icmp_hdr->icmp6_type = 129;
+	icmp_hdr->icmp6_cksum -= 0x0001;
+	ip6h->hop_limit = 4;
+	memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+	memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+	memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+	return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+		 struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return XDP_DROP;
+	if (icmp_hdr->icmp6_type == 128)
+		return send_icmp6_reply(data, data_end);
+	if (icmp_hdr->icmp6_type != 3)
+		return XDP_PASS;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+	pckt->flow.proto = ip6h->nexthdr;
+	pckt->flags |= (1 << 0);
+	memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+	memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+	return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+	       struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return XDP_DROP;
+	if (icmp_hdr->type == 8)
+		return send_icmp_reply(data, data_end);
+	if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+		return XDP_PASS;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+	if (iph->ihl != 5)
+		return XDP_DROP;
+	pckt->flow.proto = iph->protocol;
+	pckt->flags |= (1 << 0);
+	pckt->flow.src = iph->daddr;
+	pckt->flow.dst = iph->saddr;
+	return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+		      bool hash_16bytes)
+{
+	if (hash_16bytes)
+		return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+				    pckt->flow.ports, 24);
+	else
+		return jhash_2words(pckt->flow.src, pckt->flow.ports,
+				    24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+			   struct packet_description *pckt,
+			   struct vip_meta *vip_info,
+			   bool is_ipv6, void *lru_map)
+{
+	struct real_pos_lru new_dst_lru = { };
+	bool hash_16bytes = is_ipv6;
+	__u32 *real_pos, hash, key;
+	__u64 cur_time;
+
+	if (vip_info->flags & (1 << 2))
+		hash_16bytes = 1;
+	if (vip_info->flags & (1 << 3)) {
+		pckt->flow.port16[0] = pckt->flow.port16[1];
+		memset(pckt->flow.srcv6, 0, 16);
+	}
+	hash = get_packet_hash(pckt, hash_16bytes);
+	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
+	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+		return 0;
+	key = 2 * vip_info->vip_num + hash % 2;
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return 0;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return 0;
+	if (!(vip_info->flags & (1 << 1))) {
+		__u32 conn_rate_key = 512 + 2;
+		struct lb_stats *conn_rate_stats =
+		    bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+		if (!conn_rate_stats)
+			return 1;
+		cur_time = bpf_ktime_get_ns();
+		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+			conn_rate_stats->v1 = 1;
+			conn_rate_stats->v2 = cur_time;
+		} else {
+			conn_rate_stats->v1 += 1;
+			if (conn_rate_stats->v1 >= 1)
+				return 1;
+		}
+		if (pckt->flow.proto == IPPROTO_UDP)
+			new_dst_lru.atime = cur_time;
+		new_dst_lru.pos = key;
+		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+	}
+	return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+				    struct packet_description *pckt,
+				    void *lru_map)
+{
+
+	struct real_pos_lru *dst_lru;
+	__u64 cur_time;
+	__u32 key;
+
+	dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+	if (!dst_lru)
+		return;
+	if (pckt->flow.proto == IPPROTO_UDP) {
+		cur_time = bpf_ktime_get_ns();
+		if (cur_time - dst_lru->atime > 300000)
+			return;
+		dst_lru->atime = cur_time;
+	}
+	key = dst_lru->pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+				 __u8 *protocol, __u64 off,
+				 __u16 *pkt_bytes, void *data,
+				 void *data_end)
+{
+	struct ipv6hdr *ip6h;
+	__u64 iph_len;
+	int action;
+
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+	iph_len = sizeof(struct ipv6hdr);
+	*protocol = ip6h->nexthdr;
+	pckt->flow.proto = *protocol;
+	*pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+	off += iph_len;
+	if (*protocol == 45) {
+		return XDP_DROP;
+	} else if (*protocol == 59) {
+		action = parse_icmpv6(data, data_end, off, pckt);
+		if (action >= 0)
+			return action;
+	} else {
+		memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+		memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+	}
+	return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+				 __u8 *protocol, __u64 off,
+				 __u16 *pkt_bytes, void *data,
+				 void *data_end)
+{
+	struct iphdr *iph;
+	__u64 iph_len;
+	int action;
+
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+	if (iph->ihl != 5)
+		return XDP_DROP;
+	*protocol = iph->protocol;
+	pckt->flow.proto = *protocol;
+	*pkt_bytes = __builtin_bswap16(iph->tot_len);
+	off += 20;
+	if (iph->frag_off & 65343)
+		return XDP_DROP;
+	if (*protocol == IPPROTO_ICMP) {
+		action = parse_icmp(data, data_end, off, pckt);
+		if (action >= 0)
+			return action;
+	} else {
+		pckt->flow.src = iph->saddr;
+		pckt->flow.dst = iph->daddr;
+	}
+	return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+			  bool is_ipv6, struct xdp_md *xdp)
+{
+
+	struct real_definition *dst = NULL;
+	struct packet_description pckt = { };
+	struct vip_definition vip = { };
+	struct lb_stats *data_stats;
+	struct eth_hdr *eth = data;
+	void *lru_map = &lru_cache;
+	struct vip_meta *vip_info;
+	__u32 lru_stats_key = 513;
+	__u32 mac_addr_pos = 0;
+	__u32 stats_key = 512;
+	struct ctl_value *cval;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	if (is_ipv6)
+		action = process_l3_headers_v6(&pckt, &protocol, off,
+					       &pkt_bytes, data, data_end);
+	else
+		action = process_l3_headers_v4(&pckt, &protocol, off,
+					       &pkt_bytes, data, data_end);
+	if (action >= 0)
+		return action;
+	protocol = pckt.flow.proto;
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+			return XDP_DROP;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, data_end, is_ipv6, &pckt))
+			return XDP_DROP;
+	} else {
+		return XDP_TX;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+	else
+		vip.vip = pckt.flow.dst;
+	vip.port = pckt.flow.port16[1];
+	vip.proto = pckt.flow.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.port = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return XDP_PASS;
+		if (!(vip_info->flags & (1 << 4)))
+			pckt.flow.port16[1] = 0;
+	}
+	if (data_end - data > 1400)
+		return XDP_DROP;
+	data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+	if (!data_stats)
+		return XDP_DROP;
+	data_stats->v1 += 1;
+	if (!dst) {
+		if (vip_info->flags & (1 << 0))
+			pckt.flow.port16[0] = 0;
+		if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+			connection_table_lookup(&dst, &pckt, lru_map);
+		if (dst)
+			goto out;
+		if (pckt.flow.proto == IPPROTO_TCP) {
+			struct lb_stats *lru_stats =
+			    bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+			if (!lru_stats)
+				return XDP_DROP;
+			if (pckt.flags & (1 << 1))
+				lru_stats->v1 += 1;
+			else
+				lru_stats->v2 += 1;
+		}
+		if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+			return XDP_DROP;
+		data_stats->v2 += 1;
+	}
+out:
+	cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+	if (!cval)
+		return XDP_DROP;
+	if (dst->flags & (1 << 0)) {
+		if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+			return XDP_DROP;
+	} else {
+		if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+			return XDP_DROP;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return XDP_DROP;
+	data_stats->v1 += 1;
+	data_stats->v2 += pkt_bytes;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	if (data + 4 > data_end)
+		return XDP_DROP;
+	*(u32 *)data = dst->dst;
+	return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+	void *data = (void *)(long)ctx->data;
+	void *data_end = (void *)(long)ctx->data_end;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == 8)
+		return process_packet(data, nh_off, data_end, 0, ctx);
+	else if (eth_proto == 56710)
+		return process_packet(data, nh_off, data_end, 1, ctx);
+	else
+		return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;
-- 
cgit v1.2.3


From 28ab173e96b3971842414bf88eb02eca6ea3f018 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 14 Dec 2017 17:55:17 -0800
Subject: selftests/bpf: additional bpf_call tests

Add some additional checks for few more corner cases.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 597 ++++++++++++++++++++++++++++
 1 file changed, 597 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index eaf294822a8f..3bacff0d6f91 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8110,6 +8110,180 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 		.result = ACCEPT,
 	},
+	{
+		"calls: not on unpriviledged",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: overlapping caller/callee",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "last insn is not an exit or jmp",
+		.result = REJECT,
+	},
+	{
+		"calls: wrong recursive calls",
+		.insns = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: wrong src reg",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "BPF_CALL uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"calls: wrong off value",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "BPF_CALL uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"calls: jump back loop",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn 0 to 0",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: conditional call 3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call 4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: conditional call 5",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call 6",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn",
+		.result = REJECT,
+	},
 	{
 		"calls: using r0 returned by callee",
 		.insns = {
@@ -8121,6 +8295,17 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 		.result = ACCEPT,
 	},
+	{
+		"calls: using uninit r0 from callee",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "!read_ok",
+		.result = REJECT,
+	},
 	{
 		"calls: callee is using r1",
 		.insns = {
@@ -8223,6 +8408,71 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
 	},
+	{
+		"calls: calls with stack arith",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: calls with misaligned stack access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+		.errstr = "misaligned stack access",
+		.result = REJECT,
+	},
+	{
+		"calls: calls control flow, jump test",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 43),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: calls control flow, jump test 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 43),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "jump out of range from insn 1 to 4",
+		.result = REJECT,
+	},
 	{
 		"calls: two calls with bad jump",
 		.insns = {
@@ -8297,6 +8547,18 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid destination",
 		.result = REJECT,
 	},
+	{
+		"calls: invalid call 2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "invalid destination",
+		.result = REJECT,
+	},
 	{
 		"calls: jumping across function bodies. test1",
 		.insns = {
@@ -8366,6 +8628,30 @@ static struct bpf_test tests[] = {
 		.errstr = "last insn",
 		.result = REJECT,
 	},
+	{
+		"calls: ld_abs with changing ctx data in callee",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_vlan_push),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
+		.result = REJECT,
+	},
 	{
 		"calls: two calls with bad fallthrough",
 		.insns = {
@@ -8459,6 +8745,36 @@ static struct bpf_test tests[] = {
 		.errstr = "cannot spill",
 		.result = REJECT,
 	},
+	{
+		"calls: write into caller stack frame",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: write into callee stack frame",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "cannot return stack pointer",
+		.result = REJECT,
+	},
 	{
 		"calls: two calls with stack write and void return",
 		.insns = {
@@ -9056,6 +9372,287 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"calls: pkt_ptr spill into caller stack 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			/* Marking is still kept, but not in all cases safe. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			/* now the pkt range is verified, read pkt_ptr from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			/* Marking is still kept and safe here. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* now the pkt range is verified, read pkt_ptr from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			/* Check marking propagated. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "same insn cannot be used with different",
+		.result = REJECT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 7",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 8",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 9",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+	},
 	{
 		"calls: caller stack init to zero or map_value_or_null",
 		.insns = {
-- 
cgit v1.2.3


From 21567eded9805acbf69807671eb94d3536f797d0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 15 Dec 2017 16:19:30 -0800
Subject: libbpf: fix Makefile exit code if libelf not found

/bin/sh's exit does not recognize -1 as a number, leading to
the following error message:

/bin/sh: 1: exit: Illegal number: -1

Use 1 as the exit code.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4555304dc18e..8ed43ae9db9b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -213,10 +213,10 @@ PHONY += force elfdep bpfdep
 force:
 
 elfdep:
-	@if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi
+	@if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
 
 bpfdep:
-	@if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi
+	@if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
 
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable so we can use it in if_changed and friends.
-- 
cgit v1.2.3


From 4ca998fe46b1fce4988005851df2c85b7bf2addf Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 18 Dec 2017 15:11:30 -0800
Subject: selftests/bpf: add netdevsim to config

BPF offload tests (test_offload.py) will require netdevsim
to be built, add it to config.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 9d4897317c77..983dd25d49f4 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -4,3 +4,4 @@ CONFIG_NET_CLS_BPF=m
 CONFIG_BPF_EVENTS=y
 CONFIG_TEST_BPF=m
 CONFIG_CGROUP_BPF=y
+CONFIG_NETDEVSIM=m
-- 
cgit v1.2.3


From 7d9890ef505a8c2a778d304535e26e827d58c466 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Tue, 19 Dec 2017 15:53:11 -0500
Subject: libbpf: Fix build errors.

These elf object pieces are of type Elf64_Xword and therefore could be
"long long" on some builds.

Cast to "long long" and use printf format %lld to deal with this since
we are building with -Werror=format.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5b83875b3594..e9c4b7cabcf2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -910,8 +910,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				   GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
 		}
-		pr_debug("relo for %ld value %ld name %d\n",
-			 rel.r_info >> 32, sym.st_value, sym.st_name);
+		pr_debug("relo for %lld value %lld name %d\n",
+			 (long long) (rel.r_info >> 32),
+			 (long long) sym.st_value, sym.st_name);
 
 		if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
 			pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
-- 
cgit v1.2.3


From 92e70b83c0fef79dea9b8be06944b1eaffa4a9ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Thu, 14 Dec 2017 21:26:08 +0100
Subject: tools/gpio: Don't use u_int32_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

u_int32_t is a non-standard version of uint32_t, that was apparently
introduced by BSD. Use uint32_t from stdint.h instead.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 tools/gpio/gpio-event-mon.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 1c14c2595158..be6768e21b09 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -14,6 +14,7 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <dirent.h>
 #include <errno.h>
@@ -27,8 +28,8 @@
 
 int monitor_device(const char *device_name,
 		   unsigned int line,
-		   u_int32_t handleflags,
-		   u_int32_t eventflags,
+		   uint32_t handleflags,
+		   uint32_t eventflags,
 		   unsigned int loops)
 {
 	struct gpioevent_request req;
@@ -145,8 +146,8 @@ int main(int argc, char **argv)
 	const char *device_name = NULL;
 	unsigned int line = -1;
 	unsigned int loops = 0;
-	u_int32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
-	u_int32_t eventflags = 0;
+	uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
+	uint32_t eventflags = 0;
 	int c;
 
 	while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
-- 
cgit v1.2.3


From 5d0c138eff18651549e5501aaca8f3726cc9a6b5 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Tue, 19 Dec 2017 10:37:02 -0800
Subject: selftests: rtnetlink: add gretap test cases

Add test cases for gretap and ip6gretap, native mode
and external (collect metadata) mode.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 98 ++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 5215493166c9..dada4ab69142 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -502,6 +502,102 @@ kci_test_macsec()
 	echo "PASS: macsec"
 }
 
+kci_test_gretap()
+{
+	testns="testns"
+	DEV_NS=gretap00
+	ret=0
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP gretap tests: cannot add net namespace $testns"
+		return 1
+	fi
+
+	ip link help gretap 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: gretap: iproute2 too old"
+		return 1
+	fi
+
+	# test native tunnel
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \
+		key 102 local 172.16.1.100 remote 172.16.1.200
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: gretap"
+		return 1
+	fi
+	echo "PASS: gretap"
+
+	ip netns del "$testns"
+}
+
+kci_test_ip6gretap()
+{
+	testns="testns"
+	DEV_NS=ip6gretap00
+	ret=0
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+		return 1
+	fi
+
+	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: ip6gretap: iproute2 too old"
+		return 1
+	fi
+
+	# test native tunnel
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \
+		key 102 local fc00:100::1 remote fc00:100::2
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ip6gretap"
+		return 1
+	fi
+	echo "PASS: ip6gretap"
+
+	ip netns del "$testns"
+}
+
 kci_test_rtnl()
 {
 	kci_add_dummy
@@ -514,6 +610,8 @@ kci_test_rtnl()
 	kci_test_route_get
 	kci_test_tc
 	kci_test_gre
+	kci_test_gretap
+	kci_test_ip6gretap
 	kci_test_bridge
 	kci_test_addrlabel
 	kci_test_ifalias
-- 
cgit v1.2.3


From 7105e828c087de970fcb5a9509db51bfe6bd7894 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 20 Dec 2017 13:42:57 +0100
Subject: bpf: allow for correlation of maps and helpers in dump

Currently a dump of an xlated prog (post verifier stage) doesn't
correlate used helpers as well as maps. The prog info lists
involved map ids, however there's no correlation of where in the
program they are used as of today. Likewise, bpftool does not
correlate helper calls with the target functions.

The latter can be done w/o any kernel changes through kallsyms,
and also has the advantage that this works with inlined helpers
and BPF calls.

Example, via interpreter:

  # tc filter show dev foo ingress
  filter protocol all pref 49152 bpf chain 0
  filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \
                      direct-action not_in_hw id 1 tag c74773051b364165   <-- prog id:1

  * Output before patch (calls/maps remain unclear):

  # bpftool prog dump xlated id 1             <-- dump prog id:1
   0: (b7) r1 = 2
   1: (63) *(u32 *)(r10 -4) = r1
   2: (bf) r2 = r10
   3: (07) r2 += -4
   4: (18) r1 = 0xffff95c47a8d4800
   6: (85) call unknown#73040
   7: (15) if r0 == 0x0 goto pc+18
   8: (bf) r2 = r10
   9: (07) r2 += -4
  10: (bf) r1 = r0
  11: (85) call unknown#73040
  12: (15) if r0 == 0x0 goto pc+23
  [...]

  * Output after patch:

  # bpftool prog dump xlated id 1
   0: (b7) r1 = 2
   1: (63) *(u32 *)(r10 -4) = r1
   2: (bf) r2 = r10
   3: (07) r2 += -4
   4: (18) r1 = map[id:2]                     <-- map id:2
   6: (85) call bpf_map_lookup_elem#73424     <-- helper call
   7: (15) if r0 == 0x0 goto pc+18
   8: (bf) r2 = r10
   9: (07) r2 += -4
  10: (bf) r1 = r0
  11: (85) call bpf_map_lookup_elem#73424
  12: (15) if r0 == 0x0 goto pc+23
  [...]

  # bpftool map show id 2                     <-- show/dump/etc map id:2
  2: hash_of_maps  flags 0x0
        key 4B  value 4B  max_entries 3  memlock 4096B

Example, JITed, same prog:

  # tc filter show dev foo ingress
  filter protocol all pref 49152 bpf chain 0
  filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \
                  direct-action not_in_hw id 3 tag c74773051b364165 jited

  # bpftool prog show id 3
  3: sched_cls  tag c74773051b364165
        loaded_at Dec 19/13:48  uid 0
        xlated 384B  jited 257B  memlock 4096B  map_ids 2

  # bpftool prog dump xlated id 3
   0: (b7) r1 = 2
   1: (63) *(u32 *)(r10 -4) = r1
   2: (bf) r2 = r10
   3: (07) r2 += -4
   4: (18) r1 = map[id:2]                      <-- map id:2
   6: (85) call __htab_map_lookup_elem#77408   <-+ inlined rewrite
   7: (15) if r0 == 0x0 goto pc+2                |
   8: (07) r0 += 56                              |
   9: (79) r0 = *(u64 *)(r0 +0)                <-+
  10: (15) if r0 == 0x0 goto pc+24
  11: (bf) r2 = r10
  12: (07) r2 += -4
  [...]

Example, same prog, but kallsyms disabled (in that case we are
also not allowed to pass any relative offsets, etc, so prog
becomes pointer sanitized on dump):

  # sysctl kernel.kptr_restrict=2
  kernel.kptr_restrict = 2

  # bpftool prog dump xlated id 3
   0: (b7) r1 = 2
   1: (63) *(u32 *)(r10 -4) = r1
   2: (bf) r2 = r10
   3: (07) r2 += -4
   4: (18) r1 = map[id:2]
   6: (85) call bpf_unspec#0
   7: (15) if r0 == 0x0 goto pc+2
  [...]

Example, BPF calls via interpreter:

  # bpftool prog dump xlated id 1
   0: (85) call pc+2#__bpf_prog_run_args32
   1: (b7) r0 = 1
   2: (95) exit
   3: (b7) r0 = 2
   4: (95) exit

Example, BPF calls via JIT:

  # sysctl net.core.bpf_jit_enable=1
  net.core.bpf_jit_enable = 1
  # sysctl net.core.bpf_jit_kallsyms=1
  net.core.bpf_jit_kallsyms = 1

  # bpftool prog dump xlated id 1
   0: (85) call pc+2#bpf_prog_3b185187f1855c4c_F
   1: (b7) r0 = 1
   2: (95) exit
   3: (b7) r0 = 2
   4: (95) exit

And finally, an example for tail calls that is now working
as well wrt correlation:

  # bpftool prog dump xlated id 2
  [...]
  10: (b7) r2 = 8
  11: (85) call bpf_trace_printk#-41312
  12: (bf) r1 = r6
  13: (18) r2 = map[id:1]
  15: (b7) r3 = 0
  16: (85) call bpf_tail_call#12
  17: (b7) r1 = 42
  18: (6b) *(u16 *)(r6 +46) = r1
  19: (b7) r0 = 0
  20: (95) exit

  # bpftool map show id 1
  1: prog_array  flags 0x0
        key 4B  value 4B  max_entries 1  memlock 4096B

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/prog.c | 181 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 172 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 037484ceaeaf..42ee8892549c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -401,6 +401,88 @@ static int do_show(int argc, char **argv)
 	return err;
 }
 
+#define SYM_MAX_NAME	256
+
+struct kernel_sym {
+	unsigned long address;
+	char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+	unsigned long address_call_base;
+	struct kernel_sym *sym_mapping;
+	__u32 sym_count;
+	char scratch_buff[SYM_MAX_NAME];
+};
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+	return ((struct kernel_sym *)sym_a)->address -
+	       ((struct kernel_sym *)sym_b)->address;
+}
+
+static void kernel_syms_load(struct dump_data *dd)
+{
+	struct kernel_sym *sym;
+	char buff[256];
+	void *tmp, *address;
+	FILE *fp;
+
+	fp = fopen("/proc/kallsyms", "r");
+	if (!fp)
+		return;
+
+	while (!feof(fp)) {
+		if (!fgets(buff, sizeof(buff), fp))
+			break;
+		tmp = realloc(dd->sym_mapping,
+			      (dd->sym_count + 1) *
+			      sizeof(*dd->sym_mapping));
+		if (!tmp) {
+out:
+			free(dd->sym_mapping);
+			dd->sym_mapping = NULL;
+			fclose(fp);
+			return;
+		}
+		dd->sym_mapping = tmp;
+		sym = &dd->sym_mapping[dd->sym_count];
+		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+			continue;
+		sym->address = (unsigned long)address;
+		if (!strcmp(sym->name, "__bpf_call_base")) {
+			dd->address_call_base = sym->address;
+			/* sysctl kernel.kptr_restrict was set */
+			if (!sym->address)
+				goto out;
+		}
+		if (sym->address)
+			dd->sym_count++;
+	}
+
+	fclose(fp);
+
+	qsort(dd->sym_mapping, dd->sym_count,
+	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+static void kernel_syms_destroy(struct dump_data *dd)
+{
+	free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+					     unsigned long key)
+{
+	struct kernel_sym sym = {
+		.address = key,
+	};
+
+	return dd->sym_mapping ?
+	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
+		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
 static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
 {
 	va_list args;
@@ -410,8 +492,71 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
 	va_end(args);
 }
 
-static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes)
+static const char *print_call_pcrel(struct dump_data *dd,
+				    struct kernel_sym *sym,
+				    unsigned long address,
+				    const struct bpf_insn *insn)
 {
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#%s", insn->off, sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#0x%lx", insn->off, address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+				     struct kernel_sym *sym,
+				     unsigned long address)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%s", sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%lx", address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+			      const struct bpf_insn *insn)
+{
+	struct dump_data *dd = private_data;
+	unsigned long address = dd->address_call_base + insn->imm;
+	struct kernel_sym *sym;
+
+	sym = kernel_syms_search(dd, address);
+	if (insn->src_reg == BPF_PSEUDO_CALL)
+		return print_call_pcrel(dd, sym, address, insn);
+	else
+		return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+			     const struct bpf_insn *insn,
+			     __u64 full_imm)
+{
+	struct dump_data *dd = private_data;
+
+	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[id:%u]", insn->imm);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%llx", (unsigned long long)full_imm);
+	return dd->scratch_buff;
+}
+
+static void dump_xlated_plain(struct dump_data *dd, void *buf,
+			      unsigned int len, bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
 	struct bpf_insn *insn = buf;
 	bool double_insn = false;
 	unsigned int i;
@@ -425,7 +570,7 @@ static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes)
 		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
 
 		printf("% 4d: ", i);
-		print_bpf_insn(print_insn, NULL, insn + i, true);
+		print_bpf_insn(&cbs, NULL, insn + i, true);
 
 		if (opcodes) {
 			printf("       ");
@@ -454,8 +599,15 @@ static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
 	va_end(args);
 }
 
-static void dump_xlated_json(void *buf, unsigned int len, bool opcodes)
+static void dump_xlated_json(struct dump_data *dd, void *buf,
+			     unsigned int len, bool opcodes)
 {
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_json,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
 	struct bpf_insn *insn = buf;
 	bool double_insn = false;
 	unsigned int i;
@@ -470,7 +622,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes)
 
 		jsonw_start_object(json_wtr);
 		jsonw_name(json_wtr, "disasm");
-		print_bpf_insn(print_insn_json, NULL, insn + i, true);
+		print_bpf_insn(&cbs, NULL, insn + i, true);
 
 		if (opcodes) {
 			jsonw_name(json_wtr, "opcodes");
@@ -505,6 +657,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes)
 static int do_dump(int argc, char **argv)
 {
 	struct bpf_prog_info info = {};
+	struct dump_data dd = {};
 	__u32 len = sizeof(info);
 	unsigned int buf_size;
 	char *filepath = NULL;
@@ -592,6 +745,14 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if ((member_len == &info.jited_prog_len &&
+	     info.jited_prog_insns == 0) ||
+	    (member_len == &info.xlated_prog_len &&
+	     info.xlated_prog_insns == 0)) {
+		p_err("error retrieving insn dump: kernel.kptr_restrict set?");
+		goto err_free;
+	}
+
 	if (filepath) {
 		fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600);
 		if (fd < 0) {
@@ -608,17 +769,19 @@ static int do_dump(int argc, char **argv)
 			goto err_free;
 		}
 	} else {
-		if (member_len == &info.jited_prog_len)
+		if (member_len == &info.jited_prog_len) {
 			disasm_print_insn(buf, *member_len, opcodes);
-		else
+		} else {
+			kernel_syms_load(&dd);
 			if (json_output)
-				dump_xlated_json(buf, *member_len, opcodes);
+				dump_xlated_json(&dd, buf, *member_len, opcodes);
 			else
-				dump_xlated_plain(buf, *member_len, opcodes);
+				dump_xlated_plain(&dd, buf, *member_len, opcodes);
+			kernel_syms_destroy(&dd);
+		}
 	}
 
 	free(buf);
-
 	return 0;
 
 err_free:
-- 
cgit v1.2.3


From c475ffad58a8a2f1d3a2bd433eaa491471981b49 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 20 Dec 2017 10:37:08 -0800
Subject: tools/bpf: adjust rlimit RLIMIT_MEMLOCK for test_dev_cgroup

The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases,
e.g. in a test machine mimicking our production system, this test may
fail due to unable to charge the required memory for prog load:

  $ ./test_dev_cgroup
  libbpf: load bpf program failed: Operation not permitted
  libbpf: failed to load program 'cgroup/dev'
  libbpf: failed to load object './dev_cgroup.o'
  Failed to load DEV_CGROUP program
  ...

Changing the default rlimit RLIMIT_MEMLOCK to unlimited
makes the test pass.

This patch also fixed a problem where when bpf_prog_load fails,
cleanup_cgroup_environment() should not be called since
setup_cgroup_environment() has not been invoked. Otherwise,
the following confusing message will appear:
  ...
  (/home/yhs/local/linux/tools/testing/selftests/bpf/cgroup_helpers.c:95:
   errno: No such file or directory) Opening Cgroup Procs: /mnt/cgroup.procs
  ...

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_dev_cgroup.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index 02c85d6c89b0..c1535b34f14f 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -10,6 +10,8 @@
 #include <string.h>
 #include <errno.h>
 #include <assert.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
@@ -23,15 +25,19 @@
 
 int main(int argc, char **argv)
 {
+	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	struct bpf_object *obj;
 	int error = EXIT_FAILURE;
 	int prog_fd, cgroup_fd;
 	__u32 prog_cnt;
 
+	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
+		perror("Unable to lift memlock rlimit");
+
 	if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
 			  &obj, &prog_fd)) {
 		printf("Failed to load DEV_CGROUP program\n");
-		goto err;
+		goto out;
 	}
 
 	if (setup_cgroup_environment()) {
@@ -89,5 +95,6 @@ int main(int argc, char **argv)
 err:
 	cleanup_cgroup_environment();
 
+out:
 	return error;
 }
-- 
cgit v1.2.3


From 1696784eb7b52b13b62d160c028ef2c2c981d4f2 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 21 Dec 2017 11:11:31 +1030
Subject: tools/gpio: Fix build error with musl libc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GPIO tools build fails when using a buildroot toolchain that uses musl
as it's C library:

arm-broomstick-linux-musleabi-gcc -Wp,-MD,./.gpio-event-mon.o.d \
 -Wp,-MT,gpio-event-mon.o -O2 -Wall -g -D_GNU_SOURCE \
 -Iinclude -D"BUILD_STR(s)=#s" -c -o gpio-event-mon.o gpio-event-mon.c
gpio-event-mon.c:30:6: error: unknown type name ‘u_int32_t’; did you mean ‘uint32_t’?
      u_int32_t handleflags,
      ^~~~~~~~~
      uint32_t

The glibc headers installed on my laptop include sys/types.h in
unistd.h, but it appears that musl does not.

Fixes: 97f69747d8b1 ("tools/gpio: add the gpio-event-mon tool")
Cc: stable@vger.kernel.org
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 tools/gpio/gpio-event-mon.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index be6768e21b09..dac4d4131d9b 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -24,6 +24,7 @@
 #include <getopt.h>
 #include <inttypes.h>
 #include <sys/ioctl.h>
+#include <sys/types.h>
 #include <linux/gpio.h>
 
 int monitor_device(const char *device_name,
-- 
cgit v1.2.3


From fd05e57bb35ad5eb7e261b64e5cf46445250f842 Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Sat, 23 Dec 2017 10:09:55 +0000
Subject: bpf: fix stacksafe exploration when comparing states

Commit cc2b14d51053 ("bpf: teach verifier to recognize zero initialized
stack") introduced a very relaxed check when comparing stacks of different
states, effectively returning a positive result in many cases where it
shouldn't.

This can create problems in cases such as this following C pseudocode:

long var;
long *x = bpf_map_lookup(...);
if (!x)
        return;

if (*x != 0xbeef)
        var = 0;
else
        var = 1;

/* This is the key part, calling a helper causes an explored state
 * to be saved with the information that "var" is on the stack as
 * STACK_ZERO, since the helper is first met by the verifier after
 * the "var = 0" assignment. This state will however be wrongly used
 * also for the "var = 1" case, so the verifier assumes "var" is always
 * 0 and will replace the NULL assignment with nops, because the
 * search pruning prevents it from exploring the faulty branch.
 */
bpf_ktime_get_ns();

if (var)
        *(long *)0 = 0xbeef;

Fix the issue by making sure that the stack is fully explored before
returning a positive comparison result.

Also attach a couple tests that highlight the bad behavior. In the first
test, without this fix instructions 16 and 17 are replaced with nops
instead of being rejected by the verifier.

The second test, instead, allows a program to make a potentially illegal
read from the stack.

Fixes: cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack")
Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 51 +++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3bacff0d6f91..5e79515d10c5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -9715,6 +9715,57 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
+	{
+		"search pruning: all branches should be verified (nop operation)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_A(1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+			BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_6, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R6 invalid mem access 'inv'",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"search pruning: all branches should be verified (invalid stack access)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+			BPF_JMP_A(1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
+			BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "invalid read from stack off -16+0 size 8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 6b80ad299208b44ba33cb6df80bdaa3f63cf03e2 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 22 Dec 2017 19:12:35 +0100
Subject: bpf: selftest for late caller stack size increase

This checks that it is not possible to bypass the total stack size check in
update_stack_depth() by calling a function that uses a large amount of
stack memory *before* using a large amount of stack memory in the caller.

Currently, the first added testcase causes a rejection as expected, but
the second testcase is (AFAICS incorrectly) accepted:

[...]
#483/p calls: stack overflow using two frames (post-call access) FAIL
Unexpected success to load!
0: (85) call pc+2
caller:
 R10=fp0,call_-1
callee:
 frame1: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_0
3: (72) *(u8 *)(r10 -300) = 0
4: (b7) r0 = 0
5: (95) exit
returning from callee:
 frame1: R0_w=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_0
to caller at 1:
 R0_w=inv0 R10=fp0,call_-1

from 5 to 1: R0=inv0 R10=fp0,call_-1
1: (72) *(u8 *)(r10 -300) = 0
2: (95) exit
processed 6 insns, stack depth 300+300
[...]
Summary: 704 PASSED, 1 FAILED

AFAICS the JIT-generated code for the second testcase shows that this
really causes the stack pointer to be decremented by 300+300:

first function:
00000000  55                push rbp
00000001  4889E5            mov rbp,rsp
00000004  4881EC58010000    sub rsp,0x158
0000000B  4883ED28          sub rbp,byte +0x28
[...]
00000025  E89AB3AFE5        call 0xffffffffe5afb3c4
0000002A  C685D4FEFFFF00    mov byte [rbp-0x12c],0x0
[...]
00000041  4883C528          add rbp,byte +0x28
00000045  C9                leave
00000046  C3                ret

second function:
00000000  55                push rbp
00000001  4889E5            mov rbp,rsp
00000004  4881EC58010000    sub rsp,0x158
0000000B  4883ED28          sub rbp,byte +0x28
[...]
00000025  C685D4FEFFFF00    mov byte [rbp-0x12c],0x0
[...]
0000003E  4883C528          add rbp,byte +0x28
00000042  C9                leave
00000043  C3                ret

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 34 +++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 5e79515d10c5..41dcc7dbba42 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8729,6 +8729,40 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_XDP,
 		.result = ACCEPT,
 	},
+	{
+		"calls: stack overflow using two frames (pre-call access)",
+		.insns = {
+			/* prog 1 */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* prog 2 */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "combined stack size",
+		.result = REJECT,
+	},
+	{
+		"calls: stack overflow using two frames (post-call access)",
+		.insns = {
+			/* prog 1 */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_EXIT_INSN(),
+
+			/* prog 2 */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "combined stack size",
+		.result = REJECT,
+	},
 	{
 		"calls: spill into caller stack frame",
 		.insns = {
-- 
cgit v1.2.3


From 6b86c4217c231cbd268bd8c6fda025b27047d3ed Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 25 Dec 2017 13:15:41 -0800
Subject: selftests/bpf: additional stack depth tests

to test inner logic of stack depth tracking

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 121 ++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 41dcc7dbba42..b5a7a6c530dc 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8763,6 +8763,127 @@ static struct bpf_test tests[] = {
 		.errstr = "combined stack size",
 		.result = REJECT,
 	},
+	{
+		"calls: stack depth check using three frames. test1",
+		.insns = {
+			/* main */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		/* stack_main=32, stack_A=256, stack_B=64
+		 * and max(main+A, main+A+B) < 512
+		 */
+		.result = ACCEPT,
+	},
+	{
+		"calls: stack depth check using three frames. test2",
+		.insns = {
+			/* main */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		/* stack_main=32, stack_A=64, stack_B=256
+		 * and max(main+A, main+A+B) < 512
+		 */
+		.result = ACCEPT,
+	},
+	{
+		"calls: stack depth check using three frames. test3",
+		.insns = {
+			/* main */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+			/* B */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		/* stack_main=64, stack_A=224, stack_B=256
+		 * and max(main+A, main+A+B) > 512
+		 */
+		.errstr = "combined stack",
+		.result = REJECT,
+	},
+	{
+		"calls: stack depth check using three frames. test4",
+		/* void main(void) {
+		 *   func1(0);
+		 *   func1(1);
+		 *   func2(1);
+		 * }
+		 * void func1(int alloc_or_recurse) {
+		 *   if (alloc_or_recurse) {
+		 *     frame_pointer[-300] = 1;
+		 *   } else {
+		 *     func2(alloc_or_recurse);
+		 *   }
+		 * }
+		 * void func2(int alloc_or_recurse) {
+		 *   if (alloc_or_recurse) {
+		 *     frame_pointer[-300] = 1;
+		 *   }
+		 * }
+		 */
+		.insns = {
+			/* main */
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = REJECT,
+		.errstr = "combined stack",
+	},
 	{
 		"calls: spill into caller stack frame",
 		.insns = {
-- 
cgit v1.2.3


From aada9ce644e53410954daa6beb1f7c4ca158abd7 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 25 Dec 2017 13:15:42 -0800
Subject: bpf: fix max call depth check

fix off by one error in max call depth check
and add a test

Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 35 +++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index b5a7a6c530dc..5d0a574ce270 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8884,6 +8884,41 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.errstr = "combined stack",
 	},
+	{
+		"calls: stack depth check using three frames. test5",
+		.insns = {
+			/* main */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */
+			BPF_EXIT_INSN(),
+			/* C */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */
+			BPF_EXIT_INSN(),
+			/* D */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */
+			BPF_EXIT_INSN(),
+			/* E */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */
+			BPF_EXIT_INSN(),
+			/* F */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */
+			BPF_EXIT_INSN(),
+			/* G */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */
+			BPF_EXIT_INSN(),
+			/* H */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "call stack",
+		.result = REJECT,
+	},
 	{
 		"calls: spill into caller stack frame",
 		.insns = {
-- 
cgit v1.2.3


From 4bfe3bd3cc351efd1d51b3258b060e9445533888 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 27 Dec 2017 19:16:28 +0000
Subject: tools/bpftool: use version from the kernel source tree

Bpftool determines it's own version based on the kernel
version, which is picked from the linux/version.h header.

It's strange to use the version of the installed kernel
headers, and makes much more sense to use the version
of the actual source tree, where bpftool sources are.

Fix this by building kernelversion target and use
the resulting string as bpftool version.

Example:
before:

$ bpftool version
bpftool v4.14.6

after:
$ bpftool version
bpftool v4.15.0-rc3

$bpftool version --json
{"version":"4.15.0-rc3"}

Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Makefile |  3 +++
 tools/bpf/bpftool/main.c   | 13 ++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 3f17ad317512..f8f31a8d9269 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -23,6 +23,8 @@ endif
 
 LIBBPF = $(BPF_PATH)libbpf.a
 
+BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+
 $(LIBBPF): FORCE
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
@@ -38,6 +40,7 @@ CC = gcc
 CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
 INSTALL ?= install
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index ecd53ccf1239..3a0396d87c42 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -38,7 +38,6 @@
 #include <errno.h>
 #include <getopt.h>
 #include <linux/bpf.h>
-#include <linux/version.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -95,21 +94,13 @@ static int do_help(int argc, char **argv)
 
 static int do_version(int argc, char **argv)
 {
-	unsigned int version[3];
-
-	version[0] = LINUX_VERSION_CODE >> 16;
-	version[1] = LINUX_VERSION_CODE >> 8 & 0xf;
-	version[2] = LINUX_VERSION_CODE & 0xf;
-
 	if (json_output) {
 		jsonw_start_object(json_wtr);
 		jsonw_name(json_wtr, "version");
-		jsonw_printf(json_wtr, "\"%u.%u.%u\"",
-			     version[0], version[1], version[2]);
+		jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
 		jsonw_end_object(json_wtr);
 	} else {
-		printf("%s v%u.%u.%u\n", bin_name,
-		       version[0], version[1], version[2]);
+		printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From fb982666e380c1632a74495b68b3c33a66e76430 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 27 Dec 2017 19:16:29 +0000
Subject: tools/bpftool: fix bpftool build with bintutils >= 2.9

Bpftool build is broken with binutils version 2.29 and later.
The cause is commit 003ca0fd2286 ("Refactor disassembler selection")
in the binutils repo, which changed the disassembler() function
signature.

Fix this by adding a new "feature" to the tools/build/features
infrastructure and make it responsible for decision which
disassembler() function signature to use.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile                                | 29 +++++++++++++++++++++++
 tools/bpf/bpf_jit_disasm.c                        |  7 ++++++
 tools/bpf/bpftool/Makefile                        | 24 +++++++++++++++++++
 tools/bpf/bpftool/jit_disasm.c                    |  7 ++++++
 tools/build/feature/Makefile                      |  4 ++++
 tools/build/feature/test-disassembler-four-args.c | 15 ++++++++++++
 6 files changed, 86 insertions(+)
 create mode 100644 tools/build/feature/test-disassembler-four-args.c

(limited to 'tools')

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 07a6697466ef..c8ec0ae16bf0 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -9,6 +9,35 @@ MAKE = make
 CFLAGS += -Wall -O2
 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
 
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+FEATURE_USER = .bpf
+FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_DISPLAY = libbfd disassembler-four-args
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+  check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+ifeq ($(feature-disassembler-four-args), 1)
+CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
 %.yacc.c: %.y
 	$(YACC) -o $@ -d $<
 
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 75bf526a0168..30044bc4f389 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -72,7 +72,14 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
 
 	disassemble_init_for_target(&info);
 
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+	disassemble = disassembler(info.arch,
+				   bfd_big_endian(bfdf),
+				   info.mach,
+				   bfdf);
+#else
 	disassemble = disassembler(bfdf);
+#endif
 	assert(disassemble);
 
 	do {
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f8f31a8d9269..2237bc43f71c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -46,6 +46,30 @@ LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 INSTALL ?= install
 RM ?= rm -f
 
+FEATURE_USER = .bpftool
+FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_DISPLAY = libbfd disassembler-four-args
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+  check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+ifeq ($(feature-disassembler-four-args), 1)
+CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
 include $(wildcard *.d)
 
 all: $(OUTPUT)bpftool
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 1551d3918d4c..57d32e8a1391 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -107,7 +107,14 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
 
 	disassemble_init_for_target(&info);
 
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+	disassemble = disassembler(info.arch,
+				   bfd_big_endian(bfdf),
+				   info.mach,
+				   bfdf);
+#else
 	disassemble = disassembler(bfdf);
+#endif
 	assert(disassemble);
 
 	if (json_output)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 96982640fbf8..17f2c73fff8b 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -13,6 +13,7 @@ FILES=                                          \
          test-hello.bin                         \
          test-libaudit.bin                      \
          test-libbfd.bin                        \
+         test-disassembler-four-args.bin        \
          test-liberty.bin                       \
          test-liberty-z.bin                     \
          test-cplus-demangle.bin                \
@@ -188,6 +189,9 @@ $(OUTPUT)test-libpython-version.bin:
 $(OUTPUT)test-libbfd.bin:
 	$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
 
+$(OUTPUT)test-disassembler-four-args.bin:
+	$(BUILD) -lbfd -lopcodes
+
 $(OUTPUT)test-liberty.bin:
 	$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
 
diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c
new file mode 100644
index 000000000000..45ce65cfddf0
--- /dev/null
+++ b/tools/build/feature/test-disassembler-four-args.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bfd.h>
+#include <dis-asm.h>
+
+int main(void)
+{
+	bfd *abfd = bfd_openr(NULL, NULL);
+
+	disassembler(bfd_get_arch(abfd),
+		     bfd_big_endian(abfd),
+		     bfd_get_mach(abfd),
+		     abfd);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 675fc275a3a2d905535207237402c6d8dcb5fa4b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 27 Dec 2017 18:39:09 -0800
Subject: bpf: offload: report device information for offloaded programs

Report to the user ifindex and namespace information of offloaded
programs.  If device has disappeared return -ENODEV.  Specify the
namespace using dev/inode combination.

CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db1b0923a308..4e8c60acfa32 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
cgit v1.2.3


From 522622104ebabbc3372d2fad706b4d30cee13319 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 27 Dec 2017 18:39:10 -0800
Subject: tools: bpftool: report device information for offloaded programs

Print the just-exposed device information about device to which
program is bound.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/common.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   |  2 ++
 tools/bpf/bpftool/prog.c   |  3 +++
 3 files changed, 57 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b62c94e3997a..6601c95a9258 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -44,7 +44,9 @@
 #include <unistd.h>
 #include <linux/limits.h>
 #include <linux/magic.h>
+#include <net/if.h>
 #include <sys/mount.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
 
@@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
 		free(obj);
 	}
 }
+
+static char *
+ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
+{
+	struct stat st;
+	int err;
+
+	err = stat("/proc/self/ns/net", &st);
+	if (err) {
+		p_err("Can't stat /proc/self: %s", strerror(errno));
+		return NULL;
+	}
+
+	if (st.st_dev != ns_dev || st.st_ino != ns_ino)
+		return NULL;
+
+	return if_indextoname(ifindex, buf);
+}
+
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+	char name[IF_NAMESIZE];
+
+	if (!ifindex)
+		return;
+
+	printf(" dev ");
+	if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+		printf("%s", name);
+	else
+		printf("ifindex %u ns_dev %llu ns_ino %llu",
+		       ifindex, ns_dev, ns_inode);
+}
+
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+	char name[IF_NAMESIZE];
+
+	if (!ifindex)
+		return;
+
+	jsonw_name(json_wtr, "dev");
+	jsonw_start_object(json_wtr);
+	jsonw_uint_field(json_wtr, "ifindex", ifindex);
+	jsonw_uint_field(json_wtr, "ns_dev", ns_dev);
+	jsonw_uint_field(json_wtr, "ns_inode", ns_inode);
+	if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+		jsonw_string_field(json_wtr, "ifname", name);
+	jsonw_end_object(json_wtr);
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 8f6d3cac0347..65b526fe6e7e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -96,6 +96,8 @@ struct pinned_obj {
 int build_pinned_obj_table(struct pinned_obj_table *table,
 			   enum bpf_obj_type type);
 void delete_pinned_obj_table(struct pinned_obj_table *tab);
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 
 struct cmd {
 	const char *cmd;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index fd0873178503..98f871ed53d6 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		     info->tag[0], info->tag[1], info->tag[2], info->tag[3],
 		     info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
 
+	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
 	if (info->load_time) {
 		char buf[32];
 
@@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 
 	printf("tag ");
 	fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
+	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
 	printf("\n");
 
 	if (info->load_time) {
-- 
cgit v1.2.3


From 752d7b4501c250bead233ab041738db84436b1af Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 27 Dec 2017 18:39:11 -0800
Subject: selftests/bpf: test device info reporting for bound progs

Check if bound programs report correct device info.  Test
in local namespace, in remote one, back to the local ns,
remove the device and check that information is cleared.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_offload.py | 112 +++++++++++++++++++++++++---
 1 file changed, 101 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index c940505c2978..e3c750f17cb8 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -18,6 +18,8 @@ import argparse
 import json
 import os
 import pprint
+import random
+import string
 import subprocess
 import time
 
@@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
 pp = pprint.PrettyPrinter()
 devs = [] # devices we created for clean up
 files = [] # files to be removed
+netns = [] # net namespaces to be removed
 
 def log_get_sec(level=0):
     return "*" * (log_level + level)
@@ -128,22 +131,25 @@ def rm(f):
     if f in files:
         files.remove(f)
 
-def tool(name, args, flags, JSON=True, fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True):
     params = ""
     if JSON:
         params += "%s " % (flags["json"])
 
-    ret, out = cmd(name + " " + params + args, fail=fail)
+    if ns != "":
+        ns = "ip netns exec %s " % (ns)
+
+    ret, out = cmd(ns + name + " " + params + args, fail=fail)
     if JSON and len(out.strip()) != 0:
         return ret, json.loads(out)
     else:
         return ret, out
 
-def bpftool(args, JSON=True, fail=True):
-    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def bpftool(args, JSON=True, ns="", fail=True):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
 
-def bpftool_prog_list(expected=None):
-    _, progs = bpftool("prog show", JSON=True, fail=True)
+def bpftool_prog_list(expected=None, ns=""):
+    _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -158,13 +164,13 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
         time.sleep(0.05)
     raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
 
-def ip(args, force=False, JSON=True, fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True):
     if force:
         args = "-force " + args
-    return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail)
+    return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
 
-def tc(args, JSON=True, fail=True):
-    return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def tc(args, JSON=True, ns="", fail=True):
+    return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
 
 def ethtool(dev, opt, args, fail=True):
     return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -178,6 +184,15 @@ def bpf_pinned(name):
 def bpf_bytecode(bytecode):
     return "bytecode \"%s\"" % (bytecode)
 
+def mknetns(n_retry=10):
+    for i in range(n_retry):
+        name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+        ret, _ = ip("netns add %s" % (name), fail=False)
+        if ret == 0:
+            netns.append(name)
+            return name
+    return None
+
 class DebugfsDir:
     """
     Class for accessing DebugFS directories as a dictionary.
@@ -237,6 +252,8 @@ class NetdevSim:
         self.dev = self._netdevsim_create()
         devs.append(self)
 
+        self.ns = ""
+
         self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
         self.dfs_refresh()
 
@@ -257,7 +274,7 @@ class NetdevSim:
 
     def remove(self):
         devs.remove(self)
-        ip("link del dev %s" % (self.dev["ifname"]))
+        ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
 
     def dfs_refresh(self):
         self.dfs = DebugfsDir(self.dfs_dir)
@@ -285,6 +302,11 @@ class NetdevSim:
             time.sleep(0.05)
         raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
 
+    def set_ns(self, ns):
+        name = "1" if ns == "" else ns
+        ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+        self.ns = ns
+
     def set_mtu(self, mtu, fail=True):
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
@@ -372,6 +394,8 @@ def clean_up():
         dev.remove()
     for f in files:
         cmd("rm -f %s" % (f))
+    for ns in netns:
+        cmd("ip netns delete %s" % (ns))
 
 def pin_prog(file_name, idx=0):
     progs = bpftool_prog_list(expected=(idx + 1))
@@ -381,6 +405,35 @@ def pin_prog(file_name, idx=0):
 
     return file_name, bpf_pinned(file_name)
 
+def check_dev_info(other_ns, ns, pin_file=None, removed=False):
+    if removed:
+        bpftool_prog_list(expected=0)
+        ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
+        fail(ret == 0, "Showing prog with removed device did not fail")
+        fail(err["error"].find("No such device") == -1,
+             "Showing prog with removed device expected ENODEV, error is %s" %
+             (err["error"]))
+        return
+    progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+    prog = progs[0]
+
+    fail("dev" not in prog.keys(), "Device parameters not reported")
+    dev = prog["dev"]
+    fail("ifindex" not in dev.keys(), "Device parameters not reported")
+    fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+    fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+    if not removed and not other_ns:
+        fail("ifname" not in dev.keys(), "Ifname not reported")
+        fail(dev["ifname"] != sim["ifname"],
+             "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+    else:
+        fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+        if removed:
+            fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
+            fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
+            fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
 # Parse command line
 parser = argparse.ArgumentParser()
 parser.add_argument("--log", help="output verbose log to given file")
@@ -417,6 +470,12 @@ for s in samples:
     skip(ret != 0, "sample %s/%s not found, please compile it" %
          (bpf_test_dir, s))
 
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
 try:
     obj = bpf_obj("sample_ret0.o")
     bytecode = bpf_bytecode("1,6 0 0 4294967295,")
@@ -549,6 +608,8 @@ try:
     progs = bpftool_prog_list(expected=1)
     fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
          "Loaded program has wrong ID")
+    fail("dev" in progs[0].keys(),
+         "Device parameters reported for non-offloaded program")
 
     start_test("Test XDP prog replace with bad flags...")
     ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
@@ -673,6 +734,35 @@ try:
     fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
          (time_diff, delay_sec))
 
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+    sim.set_xdp(obj, "offload")
+
+    start_test("Test bpftool bound info reporting (own ns)...")
+    check_dev_info(False, "")
+
+    start_test("Test bpftool bound info reporting (other ns)...")
+    ns = mknetns()
+    sim.set_ns(ns)
+    check_dev_info(True, "")
+
+    start_test("Test bpftool bound info reporting (remote ns)...")
+    check_dev_info(False, ns)
+
+    start_test("Test bpftool bound info reporting (back to own ns)...")
+    sim.set_ns("")
+    check_dev_info(False, "")
+
+    pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
+    sim.remove()
+
+    start_test("Test bpftool bound info reporting (removed dev)...")
+    check_dev_info(True, "", pin_file=pin_file, removed=True)
+
     print("%s: OK" % (os.path.basename(__file__)))
 
 finally:
-- 
cgit v1.2.3


From d36f45e5b46723cf2d4147173e18c52d4143176d Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 25 Dec 2017 14:43:04 -0800
Subject: selftests/net: fix bugs in address and port initialization

Address/port initialization should work correctly regardless
of the order in which command line arguments are supplied,
E.g, cfg_port should be used to connect to the remote host
even if it is processed after -D, src/dst address initialization
should not require that [-4|-6] be specified before
the -S or -D args, receiver should be able to bind to *.<cfg_port>

Achieve this by making sure that the address/port structures
are initialized after all command line options are parsed.

Store cfg_port in host-byte order, and use htons()
to set up the sin_port/sin6_port before bind/connect,
so that the network system calls get the correct values
in network-byte order.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/msg_zerocopy.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 3ab6ec403905..e11fe84de0fd 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -259,22 +259,28 @@ static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
 	return sizeof(*ip6h);
 }
 
-static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
 {
 	struct sockaddr_in6 *addr6 = (void *) sockaddr;
 	struct sockaddr_in *addr4 = (void *) sockaddr;
 
 	switch (domain) {
 	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
 		addr4->sin_family = AF_INET;
 		addr4->sin_port = htons(cfg_port);
-		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
 			error(1, 0, "ipv4 parse error: %s", str_addr);
 		break;
 	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
 		addr6->sin6_family = AF_INET6;
 		addr6->sin6_port = htons(cfg_port);
-		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
 			error(1, 0, "ipv6 parse error: %s", str_addr);
 		break;
 	default:
@@ -603,6 +609,7 @@ static void parse_opts(int argc, char **argv)
 				    sizeof(struct tcphdr) -
 				    40 /* max tcp options */;
 	int c;
+	char *daddr = NULL, *saddr = NULL;
 
 	cfg_payload_len = max_payload_len;
 
@@ -627,7 +634,7 @@ static void parse_opts(int argc, char **argv)
 			cfg_cpu = strtol(optarg, NULL, 0);
 			break;
 		case 'D':
-			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			daddr = optarg;
 			break;
 		case 'i':
 			cfg_ifindex = if_nametoindex(optarg);
@@ -638,7 +645,7 @@ static void parse_opts(int argc, char **argv)
 			cfg_cork_mixed = true;
 			break;
 		case 'p':
-			cfg_port = htons(strtoul(optarg, NULL, 0));
+			cfg_port = strtoul(optarg, NULL, 0);
 			break;
 		case 'r':
 			cfg_rx = true;
@@ -647,7 +654,7 @@ static void parse_opts(int argc, char **argv)
 			cfg_payload_len = strtoul(optarg, NULL, 0);
 			break;
 		case 'S':
-			setup_sockaddr(cfg_family, optarg, &cfg_src_addr);
+			saddr = optarg;
 			break;
 		case 't':
 			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
@@ -660,6 +667,8 @@ static void parse_opts(int argc, char **argv)
 			break;
 		}
 	}
+	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
+	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
 
 	if (cfg_payload_len > max_payload_len)
 		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
-- 
cgit v1.2.3


From ef27e2ccde77c92245eccec97be32b5996a80efc Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Tue, 26 Dec 2017 11:10:07 -0800
Subject: selftests: rtnetlink: add erspan and ip6erspan

Add test cases for ipv4, ipv6 erspan, v1 and v2 native mode
and external (collect metadata) mode.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 131 +++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index dada4ab69142..a622eeecc3a6 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -598,6 +598,135 @@ kci_test_ip6gretap()
 	ip netns del "$testns"
 }
 
+kci_test_erspan()
+{
+	testns="testns"
+	DEV_NS=erspan00
+	ret=0
+
+	ip link help erspan 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: erspan: iproute2 too old"
+		return 1
+	fi
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP erspan tests: cannot add net namespace $testns"
+		return 1
+	fi
+
+	# test native tunnel erspan v1
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+		key 102 local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 1 erspan 488
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test native tunnel erspan v2
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+		key 102 local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 2 erspan_dir ingress erspan_hwid 7
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: erspan"
+		return 1
+	fi
+	echo "PASS: erspan"
+
+	ip netns del "$testns"
+}
+
+kci_test_ip6erspan()
+{
+	testns="testns"
+	DEV_NS=ip6erspan00
+	ret=0
+
+	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: ip6erspan: iproute2 too old"
+		return 1
+	fi
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+		return 1
+	fi
+
+	# test native tunnel ip6erspan v1
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+		key 102 local fc00:100::1 remote fc00:100::2 \
+		erspan_ver 1 erspan 488
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test native tunnel ip6erspan v2
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+		key 102 local fc00:100::1 remote fc00:100::2 \
+		erspan_ver 2 erspan_dir ingress erspan_hwid 7
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" \
+		type ip6erspan external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ip6erspan"
+		return 1
+	fi
+	echo "PASS: ip6erspan"
+
+	ip netns del "$testns"
+}
+
 kci_test_rtnl()
 {
 	kci_add_dummy
@@ -612,6 +741,8 @@ kci_test_rtnl()
 	kci_test_gre
 	kci_test_gretap
 	kci_test_ip6gretap
+	kci_test_erspan
+	kci_test_ip6erspan
 	kci_test_bridge
 	kci_test_addrlabel
 	kci_test_ifalias
-- 
cgit v1.2.3


From 6ed361586b323e576fd5536078fe9f2ee7906e61 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 2 Jan 2018 21:37:36 +1100
Subject: selftests/powerpc: Add a test of SEGV error behaviour

Add a test case of the error code reported when we take a SEGV on a
mapped but inaccessible area. We broke this recently.

Based on a test case from John Sperbeck <jsperbeck@google.com>.

Acked-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/mm/.gitignore    |  3 +-
 tools/testing/selftests/powerpc/mm/Makefile      |  2 +-
 tools/testing/selftests/powerpc/mm/segv_errors.c | 78 ++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/segv_errors.c

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index e715a3f2fbf4..7d7c42ed6de9 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,4 +1,5 @@
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
-prot_sao
\ No newline at end of file
+prot_sao
+segv_errors
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index bf315bcbe663..8ebbe96d80a8 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
 TEST_GEN_FILES := tempfile
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c
new file mode 100644
index 000000000000..06ae76ee3ea1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/segv_errors.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2017 John Sperbeck
+ *
+ * Test that an access to a mapped but inaccessible area causes a SEGV and
+ * reports si_code == SEGV_ACCERR.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <ucontext.h>
+
+#include "utils.h"
+
+static bool faulted;
+static int si_code;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+	ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	faulted = true;
+	si_code = info->si_code;
+	regs->nip += 4;
+}
+
+int test_segv_errors(void)
+{
+	struct sigaction act = {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+	char c, *p = NULL;
+
+	p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
+
+	FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+	faulted = false;
+	si_code = 0;
+
+	/*
+	 * We just need a compiler barrier, but mb() works and has the nice
+	 * property of being easy to spot in the disassembly.
+	 */
+	mb();
+	c = *p;
+	mb();
+
+	FAIL_IF(!faulted);
+	FAIL_IF(si_code != SEGV_ACCERR);
+
+	faulted = false;
+	si_code = 0;
+
+	mb();
+	*p = c;
+	mb();
+
+	FAIL_IF(!faulted);
+	FAIL_IF(si_code != SEGV_ACCERR);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_segv_errors, "segv_errors");
+}
-- 
cgit v1.2.3


From 65b875bcc816335be41c336a595adbc10bd885cb Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 2 Jan 2018 14:48:35 -0800
Subject: tools: bpftool: rename cgroup list -> show in the code

So far we have used "show" as a keyword for listing
programs and maps.  Use the word "show" in the code
for cgroups too, next commit will alias show and list.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/cgroup.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 34ca303d72bc..24091d87bee3 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -41,7 +41,7 @@ static enum bpf_attach_type parse_attach_type(const char *str)
 	return __MAX_BPF_ATTACH_TYPE;
 }
 
-static int list_bpf_prog(int id, const char *attach_type_str,
+static int show_bpf_prog(int id, const char *attach_type_str,
 			 const char *attach_flags_str)
 {
 	struct bpf_prog_info info = {};
@@ -77,7 +77,7 @@ static int list_bpf_prog(int id, const char *attach_type_str,
 	return 0;
 }
 
-static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
 {
 	__u32 prog_ids[1024] = {0};
 	char *attach_flags_str;
@@ -111,23 +111,23 @@ static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
 	}
 
 	for (iter = 0; iter < prog_cnt; iter++)
-		list_bpf_prog(prog_ids[iter], attach_type_strings[type],
+		show_bpf_prog(prog_ids[iter], attach_type_strings[type],
 			      attach_flags_str);
 
 	return 0;
 }
 
-static int do_list(int argc, char **argv)
+static int do_show(int argc, char **argv)
 {
 	enum bpf_attach_type type;
 	int cgroup_fd;
 	int ret = -1;
 
 	if (argc < 1) {
-		p_err("too few parameters for cgroup list\n");
+		p_err("too few parameters for cgroup show\n");
 		goto exit;
 	} else if (argc > 1) {
-		p_err("too many parameters for cgroup list\n");
+		p_err("too many parameters for cgroup show\n");
 		goto exit;
 	}
 
@@ -147,10 +147,10 @@ static int do_list(int argc, char **argv)
 		/*
 		 * Not all attach types may be supported, so it's expected,
 		 * that some requests will fail.
-		 * If we were able to get the list for at least one
+		 * If we were able to get the show for at least one
 		 * attach type, let's return 0.
 		 */
-		if (list_attached_bpf_progs(cgroup_fd, type) == 0)
+		if (show_attached_bpf_progs(cgroup_fd, type) == 0)
 			ret = 0;
 	}
 
@@ -294,7 +294,7 @@ static int do_help(int argc, char **argv)
 }
 
 static const struct cmd cmds[] = {
-	{ "list",	do_list },
+	{ "list",	do_show },
 	{ "attach",	do_attach },
 	{ "detach",	do_detach },
 	{ "help",	do_help },
-- 
cgit v1.2.3


From 6ebe6dbd6886af07b102aca42e44edbee94a22d9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 2 Jan 2018 14:48:36 -0800
Subject: tools: bpftool: alias show and list commands

iproute2 seems to accept show and list as aliases.
Let's do the same thing, and by allowing both bring
cgroup syntax back in line with maps and progs.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 6 +++---
 tools/bpf/bpftool/Documentation/bpftool-map.rst    | 6 +++---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 6 +++---
 tools/bpf/bpftool/Documentation/bpftool.rst        | 6 +++---
 tools/bpf/bpftool/bash-completion/bpftool          | 8 ++++----
 tools/bpf/bpftool/cgroup.c                         | 3 ++-
 tools/bpf/bpftool/map.c                            | 3 ++-
 tools/bpf/bpftool/prog.c                           | 3 ++-
 8 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 45c71b1f682b..2fe2a1bdbe3e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -15,12 +15,12 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **list** | **attach** | **detach** | **help** }
+	{ **show** | **list** | **attach** | **detach** | **help** }
 
 MAP COMMANDS
 =============
 
-|	**bpftool** **cgroup list** *CGROUP*
+|	**bpftool** **cgroup { show | list }** *CGROUP*
 |	**bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 |	**bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 |	**bpftool** **cgroup help**
@@ -31,7 +31,7 @@ MAP COMMANDS
 
 DESCRIPTION
 ===========
-	**bpftool cgroup list** *CGROUP*
+	**bpftool cgroup { show | list }** *CGROUP*
 		  List all programs attached to the cgroup *CGROUP*.
 
 		  Output will start with program ID followed by attach type,
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 421cabc417e6..0ab32b312aec 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -15,13 +15,13 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **dump** | **update** | **lookup** | **getnext** | **delete**
+	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
 	| **pin** | **help** }
 
 MAP COMMANDS
 =============
 
-|	**bpftool** **map show**   [*MAP*]
+|	**bpftool** **map { show | list }**   [*MAP*]
 |	**bpftool** **map dump**    *MAP*
 |	**bpftool** **map update**  *MAP*  **key** *BYTES*   **value** *VALUE* [*UPDATE_FLAGS*]
 |	**bpftool** **map lookup**  *MAP*  **key** *BYTES*
@@ -36,7 +36,7 @@ MAP COMMANDS
 
 DESCRIPTION
 ===========
-	**bpftool map show**   [*MAP*]
+	**bpftool map { show | list }**   [*MAP*]
 		  Show information about loaded maps.  If *MAP* is specified
 		  show information only about given map, otherwise list all
 		  maps currently loaded on the system.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 81c97c0e9b67..e4ceee7f2dff 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -15,12 +15,12 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
+	{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
 
 MAP COMMANDS
 =============
 
-|	**bpftool** **prog show** [*PROG*]
+|	**bpftool** **prog { show | list }** [*PROG*]
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
@@ -31,7 +31,7 @@ MAP COMMANDS
 
 DESCRIPTION
 ===========
-	**bpftool prog show** [*PROG*]
+	**bpftool prog { show | list }** [*PROG*]
 		  Show information about loaded programs.  If *PROG* is
 		  specified show information only about given program, otherwise
 		  list all programs currently loaded on the system.
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6732a5a617e4..20689a321ffe 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -22,13 +22,13 @@ SYNOPSIS
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
 
 	*MAP-COMMANDS* :=
-	{ **show** | **dump** | **update** | **lookup** | **getnext** | **delete**
+	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
 	| **pin** | **help** }
 
-	*PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin**
+	*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
 	| **load** | **help** }
 
-	*CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** }
+	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 7febee05c8e7..0137866bb8f6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -197,7 +197,7 @@ _bpftool()
 
             local PROG_TYPE='id pinned tag'
             case $command in
-                show)
+                show|list)
                     [[ $prev != "$command" ]] && return 0
                     COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
                     return 0
@@ -232,7 +232,7 @@ _bpftool()
                     ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin show' -- \
+                        COMPREPLY=( $( compgen -W 'dump help pin show list' -- \
                             "$cur" ) )
                     ;;
             esac
@@ -240,7 +240,7 @@ _bpftool()
         map)
             local MAP_TYPE='id pinned'
             case $command in
-                show|dump)
+                show|list|dump)
                     case $prev in
                         $command)
                             COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
@@ -343,7 +343,7 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'delete dump getnext help \
-                            lookup pin show update' -- "$cur" ) )
+                            lookup pin show list update' -- "$cur" ) )
                     ;;
             esac
             ;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 24091d87bee3..35f5f003df28 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -277,7 +277,7 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s list CGROUP\n"
+		"Usage: %s %s { show | list } CGROUP\n"
 		"       %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
 		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
 		"       %s %s help\n"
@@ -294,6 +294,7 @@ static int do_help(int argc, char **argv)
 }
 
 static const struct cmd cmds[] = {
+	{ "show",	do_show },
 	{ "list",	do_show },
 	{ "attach",	do_attach },
 	{ "detach",	do_detach },
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a8c3a33dd185..8d7db9d6b9cd 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -861,7 +861,7 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s show   [MAP]\n"
+		"Usage: %s %s { show | list }   [MAP]\n"
 		"       %s %s dump    MAP\n"
 		"       %s %s update  MAP  key BYTES value VALUE [UPDATE_FLAGS]\n"
 		"       %s %s lookup  MAP  key BYTES\n"
@@ -885,6 +885,7 @@ static int do_help(int argc, char **argv)
 
 static const struct cmd cmds[] = {
 	{ "show",	do_show },
+	{ "list",	do_show },
 	{ "help",	do_help },
 	{ "dump",	do_dump },
 	{ "update",	do_update },
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 98f871ed53d6..5577960bffe4 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -836,7 +836,7 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s show [PROG]\n"
+		"Usage: %s %s { show | list } [PROG]\n"
 		"       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
@@ -854,6 +854,7 @@ static int do_help(int argc, char **argv)
 
 static const struct cmd cmds[] = {
 	{ "show",	do_show },
+	{ "list",	do_show },
 	{ "help",	do_help },
 	{ "dump",	do_dump },
 	{ "pin",	do_pin },
-- 
cgit v1.2.3


From b4fac96d9a672def4e24c1858adb7e9351471815 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 2 Jan 2018 14:48:37 -0800
Subject: tools: bpftool: remove new lines from errors

It's a little bit unusual for kernel style, but we add the new line
character to error strings inside the p_err() function.  We do this
because new lines at the end of error strings will break JSON output.

Fix a few p_err("..\n") which snuck in recently.

Fixes: 5ccda64d38cc ("bpftool: implement cgroup bpf operations")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/cgroup.c | 18 +++++++++---------
 tools/bpf/bpftool/prog.c   |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 35f5f003df28..cae32a61cb18 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -124,16 +124,16 @@ static int do_show(int argc, char **argv)
 	int ret = -1;
 
 	if (argc < 1) {
-		p_err("too few parameters for cgroup show\n");
+		p_err("too few parameters for cgroup show");
 		goto exit;
 	} else if (argc > 1) {
-		p_err("too many parameters for cgroup show\n");
+		p_err("too many parameters for cgroup show");
 		goto exit;
 	}
 
 	cgroup_fd = open(argv[0], O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s\n", argv[1]);
+		p_err("can't open cgroup %s", argv[1]);
 		goto exit;
 	}
 
@@ -171,19 +171,19 @@ static int do_attach(int argc, char **argv)
 	int i;
 
 	if (argc < 4) {
-		p_err("too few parameters for cgroup attach\n");
+		p_err("too few parameters for cgroup attach");
 		goto exit;
 	}
 
 	cgroup_fd = open(argv[0], O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s\n", argv[1]);
+		p_err("can't open cgroup %s", argv[1]);
 		goto exit;
 	}
 
 	attach_type = parse_attach_type(argv[1]);
 	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
-		p_err("invalid attach type\n");
+		p_err("invalid attach type");
 		goto exit_cgroup;
 	}
 
@@ -199,7 +199,7 @@ static int do_attach(int argc, char **argv)
 		} else if (is_prefix(argv[i], "override")) {
 			attach_flags |= BPF_F_ALLOW_OVERRIDE;
 		} else {
-			p_err("unknown option: %s\n", argv[i]);
+			p_err("unknown option: %s", argv[i]);
 			goto exit_cgroup;
 		}
 	}
@@ -229,13 +229,13 @@ static int do_detach(int argc, char **argv)
 	int ret = -1;
 
 	if (argc < 4) {
-		p_err("too few parameters for cgroup detach\n");
+		p_err("too few parameters for cgroup detach");
 		goto exit;
 	}
 
 	cgroup_fd = open(argv[0], O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s\n", argv[1]);
+		p_err("can't open cgroup %s", argv[1]);
 		goto exit;
 	}
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 5577960bffe4..c6a28be4665c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -813,12 +813,12 @@ static int do_load(int argc, char **argv)
 		usage();
 
 	if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
-		p_err("failed to load program\n");
+		p_err("failed to load program");
 		return -1;
 	}
 
 	if (do_pin_fd(prog_fd, argv[1])) {
-		p_err("failed to pin program\n");
+		p_err("failed to pin program");
 		return -1;
 	}
 
-- 
cgit v1.2.3


From 5c65a2fdb36b21c3f6ac534c5e12f81cf67726b1 Mon Sep 17 00:00:00 2001
From: Elad Wexler <elad.wexler@gmail.com>
Date: Sat, 30 Dec 2017 18:11:51 +0200
Subject: tools: usb: usbip_device_driver: prefer 'unsigned int' to 'unsigned'

Fixup a coding style issue

Signed-off-by: Elad Wexler <elad.wexler@gmail.com>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/libsrc/usbip_device_driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c
index e059b7d1ec5b..b2dac82975af 100644
--- a/tools/usb/usbip/libsrc/usbip_device_driver.c
+++ b/tools/usb/usbip/libsrc/usbip_device_driver.c
@@ -77,7 +77,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
 	const char *path, *name;
 	char filepath[SYSFS_PATH_MAX];
 	struct usb_device_descriptor descr;
-	unsigned i;
+	unsigned int i;
 	FILE *fd = NULL;
 	struct udev_device *plat;
 	const char *speed;
-- 
cgit v1.2.3


From f7a5d7b3ab3c4865af85b431e1cd18b66b6c3b0e Mon Sep 17 00:00:00 2001
From: Elad Wexler <elad.wexler@gmail.com>
Date: Sat, 30 Dec 2017 18:01:39 +0200
Subject: tools: usb: usbip: fix fd leak in case of 'fread' failure

Fix possible resource leak: fd

Signed-off-by: Elad Wexler <elad.wexler@gmail.com>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/libsrc/usbip_device_driver.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c
index b2dac82975af..ec3a0b794f15 100644
--- a/tools/usb/usbip/libsrc/usbip_device_driver.c
+++ b/tools/usb/usbip/libsrc/usbip_device_driver.c
@@ -92,7 +92,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
 		return -1;
 	ret = fread((char *) &descr, sizeof(descr), 1, fd);
 	if (ret < 0)
-		return -1;
+		goto err;
 	fclose(fd);
 
 	copy_descr_attr(dev, &descr, bDeviceClass);
@@ -124,6 +124,9 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
 	name = udev_device_get_sysname(plat);
 	strncpy(dev->busid, name, SYSFS_BUS_ID_SIZE);
 	return 0;
+err:
+	fclose(fd);
+	return -1;
 }
 
 static int is_my_device(struct udev_device *dev)
-- 
cgit v1.2.3


From 3ced9b600234e6bb13cb25654ce44c4896485cf2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 4 Jan 2018 13:55:04 -0800
Subject: tools/bpf: add a bpf selftest for stacktrace

Added a bpf selftest in test_progs at tools directory for stacktrace.
The test will populate a hashtable map and a stacktrace map
at the same time with the same key, stackid.
The user space will compare both maps, using BPF_MAP_LOOKUP_ELEM
command and BPF_MAP_GET_NEXT_KEY command, to ensure that both have
the same set of keys.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile              |   2 +-
 tools/testing/selftests/bpf/test_progs.c          | 127 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_stacktrace_map.c |  62 +++++++++++
 3 files changed, 190 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_stacktrace_map.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1304753d29ea..a8aa7e251c8e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-	test_l4lb_noinline.o test_xdp_noinline.o
+	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 09087ab12293..b549308abd19 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -837,6 +837,132 @@ static void test_tp_attach_query(void)
 	free(query);
 }
 
+static int compare_map_keys(int map1_fd, int map2_fd)
+{
+	__u32 key, next_key;
+	char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+	int err;
+
+	err = bpf_map_get_next_key(map1_fd, NULL, &key);
+	if (err)
+		return err;
+	err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
+	if (err)
+		return err;
+
+	while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
+		err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
+		if (err)
+			return err;
+
+		key = next_key;
+	}
+	if (errno != ENOENT)
+		return -1;
+
+	return 0;
+}
+
+static void test_stacktrace_map()
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	const char *file = "./test_stacktrace_map.o";
+	int bytes, efd, err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {};
+	__u32 key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		goto out;
+
+	/* Get the ID for the sched/sched_switch tracepoint */
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	/* Open the perf event and attach bpf progrram */
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* give some time for bpf program run */
+	sleep(1);
+
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		; /* fall through */
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+
+out:
+	return;
+}
+
 int main(void)
 {
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -852,6 +978,7 @@ int main(void)
 	test_pkt_md_access();
 	test_obj_name();
 	test_tp_attach_query();
+	test_stacktrace_map();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
new file mode 100644
index 000000000000..76d85c5d08bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+	.max_entries = 10000,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+	unsigned long long pad;
+	char prev_comm[16];
+	int prev_pid;
+	int prev_prio;
+	long long prev_state;
+	char next_comm[16];
+	int next_pid;
+	int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+	__u32 key = 0, val = 0, *value_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* The size of stackmap and stackid_hmap should be the same */
+	key = bpf_get_stackid(ctx, &stackmap, 0);
+	if ((int)key >= 0)
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
-- 
cgit v1.2.3


From 607bd2e502f5528e1aef94e4d6f8252ccc4dbd6a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 7 Jan 2018 12:45:16 +0200
Subject: selftests: fib_tests: Add test cases for IPv4/IPv6 FIB

Add test cases to check that IPv4 and IPv6 react to a netdev being
unregistered as expected.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile     |   1 +
 tools/testing/selftests/net/fib_tests.sh | 146 +++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100755 tools/testing/selftests/net/fib_tests.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 500c74db746c..d7c30d366935 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,6 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
+TEST_PROGS += fib_tests.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
new file mode 100755
index 000000000000..767d2ab2385d
--- /dev/null
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB behavior in response to
+# different events.
+
+ret=0
+
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+check_fail()
+{
+	if [ $1 -eq 0 ]; then
+		ret=1
+	fi
+}
+
+netns_create()
+{
+	local testns=$1
+
+	ip netns add $testns
+	ip netns exec $testns ip link set dev lo up
+}
+
+fib_unreg_unicast_test()
+{
+	ret=0
+
+	netns_create "testns"
+
+	ip netns exec testns ip link add dummy0 type dummy
+	ip netns exec testns ip link set dev dummy0 up
+
+	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip link del dev dummy0
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	check_fail $?
+
+	ip netns del testns
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: unicast route test"
+		return 1
+	fi
+	echo "PASS: unicast route test"
+}
+
+fib_unreg_multipath_test()
+{
+	ret=0
+
+	netns_create "testns"
+
+	ip netns exec testns ip link add dummy0 type dummy
+	ip netns exec testns ip link set dev dummy0 up
+
+	ip netns exec testns ip link add dummy1 type dummy
+	ip netns exec testns ip link set dev dummy1 up
+
+	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+
+	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
+	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
+
+	ip netns exec testns ip route add 203.0.113.0/24 \
+		nexthop via 198.51.100.2 dev dummy0 \
+		nexthop via 192.0.2.2 dev dummy1
+	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev dummy0 \
+		nexthop via 2001:db8:2::2 dev dummy1
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip link del dev dummy0
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	# In IPv6 we do not flush the entire multipath route.
+	check_err $?
+
+	ip netns exec testns ip link del dev dummy1
+
+	ip netns del testns
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: multipath route test"
+		return 1
+	fi
+	echo "PASS: multipath route test"
+}
+
+fib_unreg_test()
+{
+	echo "Running netdev unregister tests"
+
+	fib_unreg_unicast_test
+	fib_unreg_multipath_test
+}
+
+fib_test()
+{
+	fib_unreg_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+ip route help 2>&1 | grep -q fibmatch
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing fibmatch"
+	exit 0
+fi
+
+fib_test
+
+exit $ret
-- 
cgit v1.2.3


From 5adb7683b48e35bc863a37e026dc6038ded30560 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 7 Jan 2018 12:45:17 +0200
Subject: selftests: fib_tests: Add test cases for netdev down

Check that IPv4 and IPv6 react the same when a netdev is being put
administratively down.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 141 +++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 767d2ab2385d..25ba74f8a37e 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -120,9 +120,150 @@ fib_unreg_test()
 	fib_unreg_multipath_test
 }
 
+fib_down_unicast_test()
+{
+	ret=0
+
+	netns_create "testns"
+
+	ip netns exec testns ip link add dummy0 type dummy
+	ip netns exec testns ip link set dev dummy0 up
+
+	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip link set dev dummy0 down
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	check_fail $?
+
+	ip netns exec testns ip link del dev dummy0
+
+	ip netns del testns
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: unicast route test"
+		return 1
+	fi
+	echo "PASS: unicast route test"
+}
+
+fib_down_multipath_test_do()
+{
+	local down_dev=$1
+	local up_dev=$2
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+		oif $down_dev &> /dev/null
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+		oif $down_dev &> /dev/null
+	check_fail $?
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+		oif $up_dev &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+		oif $up_dev &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+		grep $down_dev | grep -q "dead linkdown"
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+		grep $down_dev | grep -q "dead linkdown"
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+		grep $up_dev | grep -q "dead linkdown"
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+		grep $up_dev | grep -q "dead linkdown"
+	check_fail $?
+}
+
+fib_down_multipath_test()
+{
+	ret=0
+
+	netns_create "testns"
+
+	ip netns exec testns ip link add dummy0 type dummy
+	ip netns exec testns ip link set dev dummy0 up
+
+	ip netns exec testns ip link add dummy1 type dummy
+	ip netns exec testns ip link set dev dummy1 up
+
+	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+
+	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
+	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
+
+	ip netns exec testns ip route add 203.0.113.0/24 \
+		nexthop via 198.51.100.2 dev dummy0 \
+		nexthop via 192.0.2.2 dev dummy1
+	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev dummy0 \
+		nexthop via 2001:db8:2::2 dev dummy1
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip link set dev dummy0 down
+	check_err $?
+
+	fib_down_multipath_test_do "dummy0" "dummy1"
+
+	ip netns exec testns ip link set dev dummy0 up
+	check_err $?
+	ip netns exec testns ip link set dev dummy1 down
+	check_err $?
+
+	fib_down_multipath_test_do "dummy1" "dummy0"
+
+	ip netns exec testns ip link set dev dummy0 down
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	check_fail $?
+
+	ip netns exec testns ip link del dev dummy1
+	ip netns exec testns ip link del dev dummy0
+
+	ip netns del testns
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: multipath route test"
+		return 1
+	fi
+	echo "PASS: multipath route test"
+}
+
+fib_down_test()
+{
+	echo "Running netdev down tests"
+
+	fib_down_unicast_test
+	fib_down_multipath_test
+}
+
 fib_test()
 {
 	fib_unreg_test
+	fib_down_test
 }
 
 if [ "$(id -u)" -ne 0 ];then
-- 
cgit v1.2.3


From 82e45b6fd29246f36ff8064e74d412c11feaab23 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 7 Jan 2018 12:45:18 +0200
Subject: selftests: fib_tests: Add test cases for netdev carrier change

Check that IPv4 and IPv6 react the same when the carrier of a netdev is
toggled. Local routes should not be affected by this, whereas unicast
routes should.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 142 +++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 25ba74f8a37e..a9154eefb2e2 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -260,10 +260,152 @@ fib_down_test()
 	fib_down_multipath_test
 }
 
+fib_carrier_local_test()
+{
+	ret=0
+
+	# Local routes should not be affected when carrier changes.
+	netns_create "testns"
+
+	ip netns exec testns ip link add dummy0 type dummy
+	ip netns exec testns ip link set dev dummy0 up
+
+	ip netns exec testns ip link set dev dummy0 carrier on
+
+	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+
+	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+		grep -q "linkdown"
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+		grep -q "linkdown"
+	check_fail $?
+
+	ip netns exec testns ip link set dev dummy0 carrier off
+
+	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+		grep -q "linkdown"
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+		grep -q "linkdown"
+	check_fail $?
+
+	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+
+	ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 192.0.2.1 | \
+		grep -q "linkdown"
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
+		grep -q "linkdown"
+	check_fail $?
+
+	ip netns exec testns ip link del dev dummy0
+
+	ip netns del testns
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: local route carrier test"
+		return 1
+	fi
+	echo "PASS: local route carrier test"
+}
+
+fib_carrier_unicast_test()
+{
+	ret=0
+
+	netns_create "testns"
+
+	ip netns exec testns ip link add dummy0 type dummy
+	ip netns exec testns ip link set dev dummy0 up
+
+	ip netns exec testns ip link set dev dummy0 carrier on
+
+	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+		grep -q "linkdown"
+	check_fail $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+		grep -q "linkdown"
+	check_fail $?
+
+	ip netns exec testns ip link set dev dummy0 carrier off
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+		grep -q "linkdown"
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+		grep -q "linkdown"
+	check_err $?
+
+	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
+	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+
+	ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+	check_err $?
+
+	ip netns exec testns ip route get fibmatch 192.0.2.2 | \
+		grep -q "linkdown"
+	check_err $?
+	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
+		grep -q "linkdown"
+	check_err $?
+
+	ip netns exec testns ip link del dev dummy0
+
+	ip netns del testns
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: unicast route carrier test"
+		return 1
+	fi
+	echo "PASS: unicast route carrier test"
+}
+
+fib_carrier_test()
+{
+	echo "Running netdev carrier change tests"
+
+	fib_carrier_local_test
+	fib_carrier_unicast_test
+}
+
 fib_test()
 {
 	fib_unreg_test
 	fib_down_test
+	fib_carrier_test
 }
 
 if [ "$(id -u)" -ne 0 ];then
-- 
cgit v1.2.3


From e8d5134833006a46fcbefc5f4a84d0b62bd520e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Dec 2017 08:54:05 +0100
Subject: memremap: change devm_memremap_pages interface to use struct
 dev_pagemap

This new interface is similar to how struct device (and many others)
work. The caller initializes a 'struct dev_pagemap' as required
and calls 'devm_memremap_pages'. This allows the pagemap structure to
be embedded in another structure and thus container_of can be used. In
this way application specific members can be stored in a containing
struct.

This will be used by the P2P infrastructure and HMM could probably
be cleaned up to use it as well (instead of having it's own, similar
'hmm_devmem_pages_create' function).

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/iomap.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e1f75a1914a1..ff9d3a5825e1 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
-		struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
-	resource_size_t offset = res->start;
+	resource_size_t offset = pgmap->res.start;
 	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
 	if (nfit_res)
 		return nfit_res->buf + offset - nfit_res->res.start;
-	return devm_memremap_pages(dev, res, ref, altmap);
+	return devm_memremap_pages(dev, pgmap);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
 
-- 
cgit v1.2.3


From 912ec316686df352028afb6efec59e47a958a24d Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 5 Jan 2018 17:31:18 +0100
Subject: selftests: seccomp: fix compile error seccomp_bpf

aarch64-linux-gnu-gcc -Wl,-no-as-needed -Wall
    -lpthread seccomp_bpf.c -o seccomp_bpf
seccomp_bpf.c: In function 'tracer_ptrace':
seccomp_bpf.c:1720:12: error: '__NR_open' undeclared
    (first use in this function)
  if (nr == __NR_open)
            ^~~~~~~~~
seccomp_bpf.c:1720:12: note: each undeclared identifier is reported
    only once for each function it appears in
In file included from seccomp_bpf.c:48:0:
seccomp_bpf.c: In function 'TRACE_syscall_ptrace_syscall_dropped':
seccomp_bpf.c:1795:39: error: '__NR_open' undeclared
    (first use in this function)
  EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
                                       ^
open(2) is a legacy syscall, replaced with openat(2) since 2.6.16.
Thus new architectures in the kernel, such as arm64, don't implement
these legacy syscalls.

Fixes: a33b2d0359a0 ("selftests/seccomp: Add tests for basic ptrace
actions")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: stable@vger.kernel.org
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 24dbf634e2dd..0b457e8e0f0c 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1717,7 +1717,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 
 	if (nr == __NR_getpid)
 		change_syscall(_metadata, tracee, __NR_getppid);
-	if (nr == __NR_open)
+	if (nr == __NR_openat)
 		change_syscall(_metadata, tracee, -1);
 }
 
@@ -1792,7 +1792,7 @@ TEST_F(TRACE_syscall, ptrace_syscall_dropped)
 					   true);
 
 	/* Tracer should skip the open syscall, resulting in EPERM. */
-	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat));
 }
 
 TEST_F(TRACE_syscall, syscall_allowed)
-- 
cgit v1.2.3


From 4c1baad223906943b595a887305f2e8124821dad Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 9 Jan 2018 17:26:24 +0100
Subject: kselftest: fix OOM in memory compaction test

Running the compaction_test sometimes results in out-of-memory
failures. When I debugged this, it turned out that the code to
reset the number of hugepages to the initial value is simply
broken since we write into an open sysctl file descriptor
multiple times without seeking back to the start.

Adding the lseek here fixes the problem.

Cc: stable@vger.kernel.org
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Link: https://bugs.linaro.org/show_bug.cgi?id=3145
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/vm/compaction_test.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index a65b016d4c13..1097f04e4d80 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -137,6 +137,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
 	printf("No of huge pages allocated = %d\n",
 	       (atoi(nr_hugepages)));
 
+	lseek(fd, 0, SEEK_SET);
+
 	if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
 	    != strlen(initial_nr_hugepages)) {
 		perror("Failed to write value to /proc/sys/vm/nr_hugepages\n");
-- 
cgit v1.2.3


From 2519c35fac12f7e17ed79b1e01aa6edc161823af Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@kernel.org>
Date: Thu, 11 Jan 2018 13:46:07 +0000
Subject: tools/testing: Fix trailing semicolon

The trailing semicolon is an empty statement that does no operation.
Removing it since it doesn't do anything.

Signed-off-by: Luis de Bethencourt <luisbg@kernel.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/nsfs/pidns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
index 1182d4e437a2..e0d86e1668c0 100644
--- a/tools/testing/selftests/nsfs/pidns.c
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
 			return pr_err("NS_GET_PARENT returned a wrong namespace");
 
 		if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM)
-			return pr_err("Don't get EPERM");;
+			return pr_err("Don't get EPERM");
 	}
 
 	kill(pid, SIGKILL);
-- 
cgit v1.2.3


From 170b555777cf1329419b96f952e2c320c93ecc40 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 11 Jan 2018 10:28:35 -0700
Subject: selftests: media_tests: Fix Makefile 'clean' target warning

Remove 'clean' target and change TEST_PROGS to TEST_GEN_PROGS so the
common lib.mk 'clean' target clean these generated files. TEST_PROGS
is for shell scripts and not for generated test executables.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/media_tests/Makefile | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index be5bd4ffb895..c82cec2497de 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := media_device_test media_device_open video_device_test
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := media_device_test media_device_open video_device_test
+all: $(TEST_GEN_PROGS)
 
 include ../lib.mk
-
-clean:
-	rm -fr media_device_test media_device_open video_device_test
-- 
cgit v1.2.3


From 6ee7bcc46ea796e158c628dfff9836420281fc6a Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 9 Jan 2018 17:08:10 -0700
Subject: selftests: gen_kselftest_tar.h: Add SPDX license identifier

Replace GPL license statement with SPDX GPL-2.0 license identifier.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/gen_kselftest_tar.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
index 17d5bd0c0936..a27e2eec3586 100755
--- a/tools/testing/selftests/gen_kselftest_tar.sh
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -1,13 +1,11 @@
 #!/bin/bash
 #
+# SPDX-License-Identifier: GPL-2.0
 # gen_kselftest_tar
 # Generate kselftest tarball
 # Author: Shuah Khan <shuahkh@osg.samsung.com>
 # Copyright (C) 2015 Samsung Electronics Co., Ltd.
 
-# This software may be freely redistributed under the terms of the GNU
-# General Public License (GPLv2).
-
 # main
 main()
 {
-- 
cgit v1.2.3


From 4b461dc238cc088417efba23e4e9c67bd5dcca74 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 9 Jan 2018 18:36:55 -0700
Subject: selftests: kselftest_install.sh: Add SPDX license identifier

Replace GPL license statement with SPDX GPL-2.0 license identifier.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/kselftest_install.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
index 1555fbdb08da..ec304463883c 100755
--- a/tools/testing/selftests/kselftest_install.sh
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -1,13 +1,11 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
 #
 # Kselftest Install
 # Install kselftest tests
 # Author: Shuah Khan <shuahkh@osg.samsung.com>
 # Copyright (C) 2015 Samsung Electronics Co., Ltd.
 
-# This software may be freely redistributed under the terms of the GNU
-# General Public License (GPLv2).
-
 install_loc=`pwd`
 
 main()
-- 
cgit v1.2.3


From 7c466b97ccc6935c0a668cfefcda874c7560c3dc Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 9 Jan 2018 19:22:31 -0700
Subject: selftests: kselftest.h: Add SPDX license identifier

Replace GPL license statement with SPDX GPL-2.0 license identifier.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/kselftest.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1ae565ed9bf0..1a52b03962a3 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * kselftest.h:	kselftest framework return codes to include from
  *		selftests.
@@ -5,7 +6,6 @@
  * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  *
- * This file is released under the GPLv2.
  */
 #ifndef __KSELFTEST_H
 #define __KSELFTEST_H
-- 
cgit v1.2.3


From a4f0e38e1d0d87849e22cb2f9c3f0bbe87729f5d Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 9 Jan 2018 18:38:55 -0700
Subject: selftests: media_tests: Add SPDX license identifier

Replace GPL license statement with SPDX GPL-2.0 license identifier.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/media_tests/media_device_open.c | 3 ++-
 tools/testing/selftests/media_tests/media_device_test.c | 3 ++-
 tools/testing/selftests/media_tests/video_device_test.c | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index 44343c091a20..a5ce5434bafd 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
  * media_device_open.c - Media Controller Device Open Test
  *
  * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  *
- * This file is released under the GPLv2.
  */
 
 /*
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 5d49943e77d0..421a367e4bb3 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
  * media_device_test.c - Media Controller Device ioctl loop Test
  *
  * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  *
- * This file is released under the GPLv2.
  */
 
 /*
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
index 66d419c28653..0f6aef2e2593 100644
--- a/tools/testing/selftests/media_tests/video_device_test.c
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
 /*
  * video_device_test - Video Device Test
  *
  * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  *
- * This file is released under the GPLv2.
  */
 
 /*
-- 
cgit v1.2.3


From a38845729ea3985db5d2544ec3ef3dc8f6313a27 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 11 Jan 2018 20:29:09 -0800
Subject: bpf: offload: add map offload infrastructure

BPF map offload follow similar path to program offload.  At creation
time users may specify ifindex of the device on which they want to
create the map.  Map will be validated by the kernel's
.map_alloc_check callback and device driver will be called for the
actual allocation.  Map will have an empty set of operations
associated with it (save for alloc and free callbacks).  The real
device callbacks are kept in map->offload->dev_ops because they
have slightly different signatures.  Map operations are called in
process context so the driver may communicate with HW freely,
msleep(), wait() etc.

Map alloc and free callbacks are muxed via existing .ndo_bpf, and
are always called with rtnl lock held.  Maps and programs are
guaranteed to be destroyed before .ndo_uninit (i.e. before
unregister_netdev() returns).  Map callbacks are invoked with
bpf_devs_lock *read* locked, drivers must take care of exclusive
locking if necessary.

All offload-specific branches are marked with unlikely() (through
bpf_map_is_dev_bound()), given that branch penalty will be
negligible compared to IO anyway, and we don't want to penalize
SW path unnecessarily.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4e8c60acfa32..69f96af4a569 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -245,6 +245,7 @@ union bpf_attr {
 					 * BPF_F_NUMA_NODE is set).
 					 */
 		char	map_name[BPF_OBJ_NAME_LEN];
+		__u32	map_ifindex;	/* ifindex of netdev to create on */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
-- 
cgit v1.2.3


From b394d468e7d75637e682a9be4a1181b27186c593 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 10 Jan 2018 14:22:38 -0800
Subject: usercopy: Enhance and rename report_usercopy()

In preparation for refactoring the usercopy checks to pass offset to
the hardened usercopy report, this renames report_usercopy() to the
more accurate usercopy_abort(), marks it as noreturn because it is,
adds a hopefully helpful comment for anyone investigating such reports,
makes the function available to the slab allocators, and adds new "detail"
and "offset" arguments.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/objtool/check.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9b341584eb1b..ae39444896d4 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -138,6 +138,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"__reiserfs_panic",
 		"lbug_with_loc",
 		"fortify_panic",
+		"usercopy_abort",
 	};
 
 	if (func->bind == STB_WEAK)
-- 
cgit v1.2.3


From 45e5e1212af4633aa76db387ccaac8b41c8a7b6c Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 15 Jan 2018 19:16:15 +0000
Subject: bpftool: recognize BPF_PROG_TYPE_CGROUP_DEVICE programs

Bpftool doesn't recognize BPF_PROG_TYPE_CGROUP_DEVICE programs,
so the prog show command prints the numeric type value:

$ bpftool prog show
1: type 15  name bpf_prog1  tag ac9f93dbfd6d9b74
	loaded_at Jan 15/07:58  uid 0
	xlated 96B  jited 105B  memlock 4096B

This patch defines the corresponding textual representation:

$ bpftool prog show
1: cgroup_device  name bpf_prog1  tag ac9f93dbfd6d9b74
	loaded_at Jan 15/07:58  uid 0
	xlated 96B  jited 105B  memlock 4096B

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c6a28be4665c..099e21cf1b5c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -66,6 +66,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_LWT_XMIT]	= "lwt_xmit",
 	[BPF_PROG_TYPE_SOCK_OPS]	= "sock_ops",
 	[BPF_PROG_TYPE_SK_SKB]		= "sk_skb",
+	[BPF_PROG_TYPE_CGROUP_DEVICE]	= "cgroup_device",
 };
 
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
-- 
cgit v1.2.3


From 5e46664703b364434a2cbda3e6988fc24ae0ced5 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 14 Jan 2018 22:50:07 +0900
Subject: selftest: ftrace: Fix to pick text symbols for kprobes

Fix to pick text symbols for multiple kprobe testcase.
kallsyms shows text symbols with " t " or " T " but
current testcase picks all symbols including "t",
so it picks data symbols if it includes 't' (e.g. "str").

This fixes it to find symbol lines with " t " or " T "
(including spaces).

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reported-by: Russell King <linux@armlinux.org.uk>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index bb16cf91f1b5..e297bd7a2e79 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -12,8 +12,8 @@ case `uname -m` in
   *) OFFS=0;;
 esac
 
-echo "Setup up to 256 kprobes"
-grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+echo "Setup up kprobes on first 256 text symbols"
+grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
 head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||:
 
 echo 1 > events/kprobes/enable
-- 
cgit v1.2.3


From 9739cee6918b222af6f75869ed33127a8b1c3d77 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 14 Jan 2018 22:50:36 +0900
Subject: selftest: ftrace: Fix to add 256 kprobe events correctly

Current multiple-kprobe testcase only tries to add
kprobe events on first 256 text symbols. However
kprobes fails to probe on some text symbols (like
blacklisted symbols). Thus in the worst case,
the test can not add any kprobe events.

To avoid that, continue to try adding kprobe events
until 256 events. Also it confirms the number of
registered kprobe events.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 .../ftrace/test.d/kprobe/multiple_kprobes.tc          | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index e297bd7a2e79..ce361b9d62cf 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -12,9 +12,24 @@ case `uname -m` in
   *) OFFS=0;;
 esac
 
-echo "Setup up kprobes on first 256 text symbols"
+if [ -d events/kprobes ]; then
+  echo 0 > events/kprobes/enable
+  echo > kprobe_events
+fi
+
+N=0
+echo "Setup up kprobes on first available 256 text symbols"
 grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
-head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||:
+while read i; do
+  echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||:
+  test $N -eq 256 && break
+done
+
+L=`wc -l kprobe_events`
+if [ $L -ne $N ]; then
+  echo "The number of kprobes events ($L) is not $N"
+  exit_fail
+fi
 
 echo 1 > events/kprobes/enable
 echo 0 > events/kprobes/enable
-- 
cgit v1.2.3


From 14f1889fd4d7ea1f496e90ae055ecc086575a8cd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 16 Jan 2018 17:10:39 +1100
Subject: selftests: Fix loss of test output in run_kselftests.sh

Commit fbcab13d2e25 ("selftests: silence test output by default")
changed the run_tests logic as well as the logic to generate
run_kselftests.sh to redirect test output away from the console.

As discussed on the list and at kernel summit, this is not a desirable
default as it means in order to debug a failure the console output is
not sufficient, you also need access to the test machine to get the
full test logs. Additionally it's impolite to write directly to
/tmp/$TEST_NAME on shared systems.

The change to the run_tests logic was reverted in commit
a323335e62cc ("selftests: lib.mk: print individual test results to
console by default"), and instead a summary option was added so that
quiet output could be requested.

However the change to run_kselftests.sh was left as-is.

This commit applies the same logic to the run_kselftests.sh code, ie.
the script now takes a "--summary" option which suppresses the output,
but shows all output by default.

Additionally instead of writing to /tmp/$TEST_NAME the output is
redirected to the directory where the generated test script is
located.

Fixes: fbcab13d2e25 ("selftests: silence test output by default")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 9 ++++++++-
 tools/testing/selftests/lib.mk   | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index eaf599dc2137..7442dfb73b7f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -116,8 +116,15 @@ ifdef INSTALL_PATH
 
 	@# Ask all targets to emit their test scripts
 	echo "#!/bin/sh" > $(ALL_SCRIPT)
-	echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
+	echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
+	echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
 	echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+	echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
+	echo "  OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
+	echo "  cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT)
+	echo "else" >> $(ALL_SCRIPT)
+	echo "  OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
+	echo "fi" >> $(ALL_SCRIPT)
 
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 5bef05d6ba39..7de482a0519d 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -77,7 +77,7 @@ endif
 define EMIT_TESTS
 	@for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+		echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;
 endef
 
-- 
cgit v1.2.3


From 39b72ccdb278f53735af1a378e67e6110e3210ad Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Tue, 16 Jan 2018 15:51:47 -0800
Subject: tools: bpftool: add -DPACKAGE when including bfd.h

bfd.h is requiring including of config.h except when PACKAGE or
PACKAGE_VERSION are defined.

  /* PR 14072: Ensure that config.h is included first.  */
  #if !defined PACKAGE && !defined PACKAGE_VERSION
  #error config.h must be included before this header
  #endif

This check has been introduced since May-2012. It doesn't show up in bfd.h
on some Linux distribution, probably because distributions have remove it
when building the package.

However, sometimes the user might just build libfd from source code then
link bpftool against it. For this case, bfd.h will be original that we need
to define PACKAGE or PACKAGE_VERSION.

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Makefile   | 2 +-
 tools/build/feature/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 2237bc43f71c..26901ec87361 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -39,7 +39,7 @@ CC = gcc
 
 CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
-CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 17f2c73fff8b..bc715f6ac320 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -190,7 +190,7 @@ $(OUTPUT)test-libbfd.bin:
 	$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
 
 $(OUTPUT)test-disassembler-four-args.bin:
-	$(BUILD) -lbfd -lopcodes
+	$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
 
 $(OUTPUT)test-liberty.bin:
 	$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
-- 
cgit v1.2.3


From d77be68955475fc2321e73fe006240248f2f8fef Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 16 Jan 2018 15:51:48 -0800
Subject: libbpf: fix string comparison for guessing eBPF program type

libbpf is able to deduce the type of a program from the name of the ELF
section in which it is located. However, the comparison is made on the
first n characters, n being determined with sizeof() applied to the
reference string (e.g. "xdp"). When such section names are supposed to
receive a suffix separated with a slash (e.g. "kprobe/"), using sizeof()
takes the final NUL character of the reference string into account,
which implies that both strings must be equal. Instead, the desired
behaviour would consist in taking the length of the string, *without*
accounting for the ending NUL character, and to make sure the reference
string is a prefix to the ELF section name.

Subtract 1 to the total size of the string for obtaining the length for
the comparison.

Fixes: 583c90097f72 ("libbpf: add ability to guess program type based on section name")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e9c4b7cabcf2..30c776375118 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1803,7 +1803,7 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-#define BPF_PROG_SEC(string, type) { string, sizeof(string), type }
+#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type }
 static const struct {
 	const char *sec;
 	size_t len;
-- 
cgit v1.2.3


From 7d386c624980e476612490df52eaa86c8e066edc Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 17 Jan 2018 00:20:30 +0100
Subject: libbpf: install the header file libbpf.h

It seems like an oversight not to install the header file for libbpf,
given the libbpf.so + libbpf.a files are installed.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 8ed43ae9db9b..54370654c708 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -192,7 +192,8 @@ install_lib: all_cmd
 
 install_headers:
 	$(call QUIET_INSTALL, headers) \
-		$(call do_install,bpf.h,$(prefix)/include/bpf,644)
+		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
+		$(call do_install,libbpf.h,$(prefix)/include/bpf,644);
 
 install: install_lib
 
-- 
cgit v1.2.3


From 63c859101ec32cbc8fa5b708c7f17de63b15e56e Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 17 Jan 2018 00:20:35 +0100
Subject: libbpf: cleanup Makefile, remove unused elements

The plugin_dir_SQ variable is not used, remove it.
The function update_dir is also unused, remove it.
The variable $VERSION_FILES is empty, remove it.

These all originates from the introduction of the Makefile, and is likely a copy paste
from tools/lib/traceevent/Makefile.

Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature check")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 54370654c708..8e15e48cb8f8 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -93,7 +93,6 @@ export prefix libdir src obj
 # Shell quotes
 libdir_SQ = $(subst ','\'',$(libdir))
 libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 
 LIB_FILE = libbpf.a libbpf.so
 
@@ -150,7 +149,7 @@ CMD_TARGETS = $(LIB_FILE)
 
 TARGETS = $(CMD_TARGETS)
 
-all: fixdep $(VERSION_FILES) all_cmd
+all: fixdep all_cmd
 
 all_cmd: $(CMD_TARGETS)
 
@@ -169,16 +168,6 @@ $(OUTPUT)libbpf.so: $(BPF_IN)
 $(OUTPUT)libbpf.a: $(BPF_IN)
 	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
-define update_dir
-  (echo $1 > $@.tmp;				\
-   if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
-     rm -f $@.tmp;				\
-   else						\
-     echo '  UPDATE                 $@';	\
-     mv -f $@.tmp $@;				\
-   fi);
-endef
-
 define do_install
 	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
@@ -204,7 +193,7 @@ config-clean:
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
 clean:
-	$(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \
+	$(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \
 		$(RM) LIBBPF-CFLAGS
 	$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
-- 
cgit v1.2.3


From 7110d80d53f472956420cd05a6297f49b558b674 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 17 Jan 2018 00:20:40 +0100
Subject: libbpf: Makefile set specified permission mode

The third parameter to do_install was not used by $(INSTALL) command.
Fix this by only setting the -m option when the third parameter is supplied.

The use of a third parameter was introduced in commit  eb54e522a000 ("bpf:
install libbpf headers on 'make install'").

Without this change, the header files are install as executables files (755).

Fixes: eb54e522a000 ("bpf: install libbpf headers on 'make install'")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 8e15e48cb8f8..83714ca1f22b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -172,7 +172,7 @@ define do_install
 	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
 	fi;						\
-	$(INSTALL) $1 '$(DESTDIR_SQ)$2'
+	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
 endef
 
 install_lib: all_cmd
-- 
cgit v1.2.3


From e65935969d0fac9df28d9c49bdbab5d8d8286a20 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Tue, 16 Jan 2018 16:05:21 -0800
Subject: tools: bpftool: improve architecture detection by using ifindex

The current architecture detection method in bpftool is designed for host
case.

For offload case, we can't use the architecture of "bpftool" itself.
Instead, we could call the existing "ifindex_to_name_ns" to get DEVNAME,
then read pci id from /sys/class/dev/DEVNAME/device/vendor, finally we map
vendor id to bfd arch name which will finally be used to select bfd backend
for the disassembler.

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/common.c     | 72 ++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/jit_disasm.c | 16 +++++++++-
 tools/bpf/bpftool/main.h       |  5 ++-
 tools/bpf/bpftool/prog.c       | 12 ++++++-
 4 files changed, 102 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 6601c95a9258..0b482c0070e0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -34,6 +34,7 @@
 /* Author: Jakub Kicinski <kubakici@wp.pl> */
 
 #include <errno.h>
+#include <fcntl.h>
 #include <fts.h>
 #include <libgen.h>
 #include <mntent.h>
@@ -433,6 +434,77 @@ ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
 	return if_indextoname(ifindex, buf);
 }
 
+static int read_sysfs_hex_int(char *path)
+{
+	char vendor_id_buf[8];
+	int len;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		p_err("Can't open %s: %s", path, strerror(errno));
+		return -1;
+	}
+
+	len = read(fd, vendor_id_buf, sizeof(vendor_id_buf));
+	close(fd);
+	if (len < 0) {
+		p_err("Can't read %s: %s", path, strerror(errno));
+		return -1;
+	}
+	if (len >= (int)sizeof(vendor_id_buf)) {
+		p_err("Value in %s too long", path);
+		return -1;
+	}
+
+	vendor_id_buf[len] = 0;
+
+	return strtol(vendor_id_buf, NULL, 0);
+}
+
+static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
+{
+	char full_path[64];
+
+	snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s",
+		 devname, entry_name);
+
+	return read_sysfs_hex_int(full_path);
+}
+
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
+{
+	char devname[IF_NAMESIZE];
+	int vendor_id;
+	int device_id;
+
+	if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
+		p_err("Can't get net device name for ifindex %d: %s", ifindex,
+		      strerror(errno));
+		return NULL;
+	}
+
+	vendor_id = read_sysfs_netdev_hex_int(devname, "vendor");
+	if (vendor_id < 0) {
+		p_err("Can't get device vendor id for %s", devname);
+		return NULL;
+	}
+
+	switch (vendor_id) {
+	case 0x19ee:
+		device_id = read_sysfs_netdev_hex_int(devname, "device");
+		if (device_id != 0x4000 &&
+		    device_id != 0x6000 &&
+		    device_id != 0x6003)
+			p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
+		return "NFP-6xxx";
+	default:
+		p_err("Can't get bfd arch name for device vendor id 0x%04x",
+		      vendor_id);
+		return NULL;
+	}
+}
+
 void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
 {
 	char name[IF_NAMESIZE];
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 57d32e8a1391..87439320ef70 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...)
 	return 0;
 }
 
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		       const char *arch)
 {
 	disassembler_ftype disassemble;
 	struct disassemble_info info;
@@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
 	else
 		init_disassemble_info(&info, stdout,
 				      (fprintf_ftype) fprintf);
+
+	/* Update architecture info for offload. */
+	if (arch) {
+		const bfd_arch_info_type *inf = bfd_scan_arch(arch);
+
+		if (inf) {
+			bfdf->arch_info = inf;
+		} else {
+			p_err("No libfd support for %s", arch);
+			return;
+		}
+	}
+
 	info.arch = bfd_get_arch(bfdf);
 	info.mach = bfd_get_mach(bfdf);
 	info.buffer = image;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 65b526fe6e7e..b8e9584d6246 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -121,7 +121,10 @@ int do_cgroup(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes);
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		       const char *arch);
 void print_hex_data_json(uint8_t *data, size_t len);
 
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
+
 #endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 099e21cf1b5c..e8e2baaf93c2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -776,7 +776,17 @@ static int do_dump(int argc, char **argv)
 		}
 	} else {
 		if (member_len == &info.jited_prog_len) {
-			disasm_print_insn(buf, *member_len, opcodes);
+			const char *name = NULL;
+
+			if (info.ifindex) {
+				name = ifindex_to_bfd_name_ns(info.ifindex,
+							      info.netns_dev,
+							      info.netns_ino);
+				if (!name)
+					goto err_free;
+			}
+
+			disasm_print_insn(buf, *member_len, opcodes, name);
 		} else {
 			kernel_syms_load(&dd);
 			if (json_output)
-- 
cgit v1.2.3


From b223e3b4e03a13739ab462560b791a4c692fd86e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 18 Jan 2018 12:35:21 +0300
Subject: tools/bpf_jit_disasm: silence a static checker warning

There is a static checker warning that "proglen" has an upper bound but
no lower bound.  The allocation will just fail harmlessly so it's not a
big deal.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpf_jit_disasm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 30044bc4f389..58c2bab4ef6e 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -172,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
 {
 	char *ptr, *pptr, *tmp;
 	off_t off = 0;
-	int ret, flen, proglen, pass, ulen = 0;
+	unsigned int proglen;
+	int ret, flen, pass, ulen = 0;
 	regmatch_t pmatch[1];
 	unsigned long base;
 	regex_t regex;
@@ -199,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
 	}
 
 	ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
-	ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
+	ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx",
 		     &flen, &proglen, &pass, &base);
 	if (ret != 4) {
 		regfree(&regex);
@@ -239,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
 	}
 
 	assert(ulen == proglen);
-	printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
+	printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
 	       proglen, pass, flen);
 	printf("%lx + <x>:\n", base);
 
-- 
cgit v1.2.3


From e7b2823a582a5bca5ee47644f448e317178e8824 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 18 Jan 2018 17:49:08 +0100
Subject: bpf: Sync kernel ABI header with tooling header

Update tools/include/uapi/linux/bpf.h to bring it in sync with
include/uapi/linux/bpf.h.  The listed commits forgot to update it.

Fixes: 02dd3291b2f0 ("bpf: finally expose xdp_rxq_info to XDP bpf-programs")
Fixes: f19397a5c656 ("bpf: Add access to snd_cwnd and others in sock_ops")
Fixes: 06ef0ccb5a36 ("bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69f96af4a569..7c2259e8bc54 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -900,6 +900,9 @@ struct xdp_md {
 	__u32 data;
 	__u32 data_end;
 	__u32 data_meta;
+	/* Below access go through struct xdp_rxq_info */
+	__u32 ingress_ifindex; /* rxq->dev->ifindex */
+	__u32 rx_queue_index;  /* rxq->queue_index  */
 };
 
 enum sk_action {
@@ -956,6 +959,12 @@ struct bpf_sock_ops {
 	__u32 local_ip6[4];	/* Stored in network byte order */
 	__u32 remote_port;	/* Stored in network byte order */
 	__u32 local_port;	/* stored in host byte order */
+	__u32 is_fullsock;	/* Some TCP fields are only valid if
+				 * there is a full socket. If not, the
+				 * fields read as zero.
+				 */
+	__u32 snd_cwnd;
+	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 };
 
 /* List of known BPF sock_ops operators.
@@ -1010,7 +1019,8 @@ struct bpf_perf_event_value {
 #define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
 
 struct bpf_cgroup_dev_ctx {
-	__u32 access_type; /* (access << 16) | type */
+	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+	__u32 access_type;
 	__u32 major;
 	__u32 minor;
 };
-- 
cgit v1.2.3


From 111e6b45315c8d13658f23885b30eb9df3ea2914 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 17 Jan 2018 16:52:03 -0800
Subject: selftests/bpf: make test_verifier run most programs

to improve test coverage make test_verifier run all successfully loaded
programs on 64-byte zero initialized data.
For clsbpf and xdp it means empty 64-byte packet.
For lwt and socket_filters it's 64-byte packet where skb->data
points after L2.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 50 ++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 960179882a1c..6c22edb1f006 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -29,6 +29,7 @@
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
 #include <linux/bpf.h>
+#include <linux/if_ether.h>
 
 #include <bpf/bpf.h>
 
@@ -49,6 +50,8 @@
 #define MAX_INSNS	512
 #define MAX_FIXUPS	8
 #define MAX_NR_MAPS	4
+#define POINTER_VALUE	0xcafe4all
+#define TEST_DATA_LEN	64
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
@@ -62,6 +65,7 @@ struct bpf_test {
 	int fixup_map_in_map[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
+	uint32_t retval;
 	enum {
 		UNDEF,
 		ACCEPT,
@@ -95,6 +99,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = -3,
 	},
 	{
 		"unreachable",
@@ -210,6 +215,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"test8 ld_imm64",
@@ -517,6 +523,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
+		.retval = POINTER_VALUE,
 	},
 	{
 		"check valid spill/fill, skb mark",
@@ -803,6 +810,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = -ENOENT,
 	},
 	{
 		"jump test 4",
@@ -1823,6 +1831,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 0xfaceb00c,
 	},
 	{
 		"PTR_TO_STACK store/load - bad alignment on off",
@@ -1881,6 +1890,7 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.errstr_unpriv = "R0 leaks addr",
+		.retval = POINTER_VALUE,
 	},
 	{
 		"unpriv: add const to pointer",
@@ -2054,6 +2064,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_get_hash_recalc),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
@@ -2818,6 +2829,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test12 (and, good access)",
@@ -2842,6 +2854,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test13 (branches, good access)",
@@ -2872,6 +2885,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
@@ -2895,6 +2909,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test15 (spill with xadd)",
@@ -3181,6 +3196,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
 	},
 	{
 		"direct packet access: test28 (marking on <=, bad access)",
@@ -5798,6 +5814,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 0 /* csum_diff of 64-byte packet */,
 	},
 	{
 		"helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
@@ -6166,6 +6183,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 42 /* ultimate return value */,
 	},
 	{
 		"ld_ind: check calling conv, r1",
@@ -6237,6 +6255,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"check bpf_perf_event_data->sample_period byte load permitted",
@@ -7224,6 +7243,7 @@ static struct bpf_test tests[] = {
 		},
 		.fixup_map1 = { 3 },
 		.result = ACCEPT,
+		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
 		.errstr_unpriv = "R0 leaks addr as return value"
 	},
@@ -7244,6 +7264,7 @@ static struct bpf_test tests[] = {
 		},
 		.fixup_map1 = { 3 },
 		.result = ACCEPT,
+		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
 		.errstr_unpriv = "R0 leaks addr as return value"
 	},
@@ -7685,6 +7706,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -8705,6 +8727,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"calls: overlapping caller/callee",
@@ -8900,6 +8923,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN,
 	},
 	{
 		"calls: callee using args1",
@@ -8912,6 +8936,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "allowed for root only",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = POINTER_VALUE,
 	},
 	{
 		"calls: callee using wrong args2",
@@ -8942,6 +8967,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "allowed for root only",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
 	},
 	{
 		"calls: callee changing pkt pointers",
@@ -8990,6 +9016,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = TEST_DATA_LEN + TEST_DATA_LEN,
 	},
 	{
 		"calls: calls with stack arith",
@@ -9008,6 +9035,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 42,
 	},
 	{
 		"calls: calls with misaligned stack access",
@@ -9041,6 +9069,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 43,
 	},
 	{
 		"calls: calls control flow, jump test 2",
@@ -9533,6 +9562,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
 		.result = ACCEPT,
+		.retval = 42,
 	},
 	{
 		"calls: write into callee stack frame",
@@ -10144,6 +10174,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = POINTER_VALUE,
 	},
 	{
 		"calls: pkt_ptr spill into caller stack 2",
@@ -10209,6 +10240,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"calls: pkt_ptr spill into caller stack 4",
@@ -10242,6 +10274,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"calls: pkt_ptr spill into caller stack 5",
@@ -10650,10 +10683,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int fd_prog, expected_ret, reject_from_alignment;
 	struct bpf_insn *prog = test->insns;
 	int prog_len = probe_filter_length(prog);
+	char data_in[TEST_DATA_LEN] = {};
 	int prog_type = test->prog_type;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
-	int i;
+	uint32_t retval;
+	int i, err;
 
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		map_fds[i] = -1;
@@ -10696,6 +10731,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		}
 	}
 
+	if (fd_prog >= 0) {
+		err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in),
+					NULL, NULL, &retval, NULL);
+		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
+			printf("Unexpected bpf_prog_test_run error\n");
+			goto fail_log;
+		}
+		if (!err && retval != test->retval &&
+		    test->retval != POINTER_VALUE) {
+			printf("FAIL retval %d != %d\n", retval, test->retval);
+			goto fail_log;
+		}
+	}
 	(*passes)++;
 	printf("OK%s\n", reject_from_alignment ?
 	       " (NOTE: reject due to unknown alignment)" : "");
-- 
cgit v1.2.3


From 52775b33bb5072fbc07b02c0cf4fe8da1f7ee7cd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 17 Jan 2018 19:13:28 -0800
Subject: bpf: offload: report device information about offloaded maps

Tell user space about device on which the map was created.
Unfortunate reality of user ABI makes sharing this code
with program offload difficult but the information is the
same.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7c2259e8bc54..af1f49ad8b88 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -938,6 +938,9 @@ struct bpf_map_info {
 	__u32 max_entries;
 	__u32 map_flags;
 	char  name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
-- 
cgit v1.2.3


From 064a07cba2919bcfbadf9edf5c26c740e69fa585 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 17 Jan 2018 19:13:29 -0800
Subject: tools: bpftool: report device information for offloaded maps

Print the information about device on which map is created.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 8d7db9d6b9cd..a152c1a5c94c 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -428,6 +428,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 
 	jsonw_name(json_wtr, "flags");
 	jsonw_printf(json_wtr, "%#x", info->map_flags);
+
+	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
 	jsonw_uint_field(json_wtr, "bytes_key", info->key_size);
 	jsonw_uint_field(json_wtr, "bytes_value", info->value_size);
 	jsonw_uint_field(json_wtr, "max_entries", info->max_entries);
@@ -469,7 +472,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 	if (*info->name)
 		printf("name %s  ", info->name);
 
-	printf("flags 0x%x\n", info->map_flags);
+	printf("flags 0x%x", info->map_flags);
+	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+	printf("\n");
 	printf("\tkey %uB  value %uB  max_entries %u",
 	       info->key_size, info->value_size, info->max_entries);
 
-- 
cgit v1.2.3


From 7fedbb7c5a7c4bda418bc1056c06c81db36e4299 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 17 Jan 2018 19:13:31 -0800
Subject: selftest/bpf: extend the offload test with map checks

Check map device information is reported correctly, and perform
basic map operations.  Check device destruction gets rid of the
maps and map allocation failure path by telling netdevsim to
reject map offload via DebugFS.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile          |   3 +-
 tools/testing/selftests/bpf/sample_map_ret0.c |  34 +++++
 tools/testing/selftests/bpf/test_offload.py   | 206 +++++++++++++++++++++++---
 3 files changed, 218 insertions(+), 25 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/sample_map_ret0.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a8aa7e251c8e..3a44b655d852 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
+	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+	sample_map_ret0.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c
new file mode 100644
index 000000000000..0756303676ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_map_ret0.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(long),
+	.max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(long),
+	.max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+	__u64 key64 = 0;
+	__u32 key = 0;
+	long *value;
+
+	value = bpf_map_lookup_elem(&htab, &key);
+	if (!value)
+		return 1;
+	value = bpf_map_lookup_elem(&array, &key64);
+	if (!value)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e3c750f17cb8..833b9c1ec450 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -20,6 +20,7 @@ import os
 import pprint
 import random
 import string
+import struct
 import subprocess
 import time
 
@@ -156,6 +157,14 @@ def bpftool_prog_list(expected=None, ns=""):
                  (len(progs), expected))
     return progs
 
+def bpftool_map_list(expected=None, ns=""):
+    _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    if expected is not None:
+        if len(maps) != expected:
+            fail(True, "%d BPF maps loaded, expected %d" %
+                 (len(maps), expected))
+    return maps
+
 def bpftool_prog_list_wait(expected=0, n_retry=20):
     for i in range(n_retry):
         nprogs = len(bpftool_prog_list())
@@ -164,6 +173,14 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
         time.sleep(0.05)
     raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
 
+def bpftool_map_list_wait(expected=0, n_retry=20):
+    for i in range(n_retry):
+        nmaps = len(bpftool_map_list())
+        if nmaps == expected:
+            return
+        time.sleep(0.05)
+    raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
 def ip(args, force=False, JSON=True, ns="", fail=True):
     if force:
         args = "-force " + args
@@ -193,6 +210,26 @@ def mknetns(n_retry=10):
             return name
     return None
 
+def int2str(fmt, val):
+    ret = []
+    for b in struct.pack(fmt, val):
+        ret.append(int(b))
+    return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+    inttab = []
+    for i in strtab:
+        inttab.append(int(i, 16))
+    ba = bytearray(inttab)
+    if len(strtab) == 4:
+        fmt = "I"
+    elif len(strtab) == 8:
+        fmt = "Q"
+    else:
+        raise Exception("String array of len %d can't be unpacked to an int" %
+                        (len(strtab)))
+    return struct.unpack(fmt, ba)[0]
+
 class DebugfsDir:
     """
     Class for accessing DebugFS directories as a dictionary.
@@ -311,13 +348,13 @@ class NetdevSim:
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
 
-    def set_xdp(self, bpf, mode, force=False, fail=True):
+    def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True):
         return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
-                  force=force, fail=fail)
+                  force=force, JSON=JSON, fail=fail)
 
-    def unset_xdp(self, mode, force=False, fail=True):
+    def unset_xdp(self, mode, force=False, JSON=True, fail=True):
         return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
-                  force=force, fail=fail)
+                  force=force, JSON=JSON, fail=fail)
 
     def ip_link_show(self, xdp):
         _, link = ip("link show dev %s" % (self['ifname']))
@@ -390,12 +427,16 @@ class NetdevSim:
 
 ################################################################################
 def clean_up():
+    global files, netns, devs
+
     for dev in devs:
         dev.remove()
     for f in files:
         cmd("rm -f %s" % (f))
     for ns in netns:
         cmd("ip netns delete %s" % (ns))
+    files = []
+    netns = []
 
 def pin_prog(file_name, idx=0):
     progs = bpftool_prog_list(expected=(idx + 1))
@@ -405,16 +446,31 @@ def pin_prog(file_name, idx=0):
 
     return file_name, bpf_pinned(file_name)
 
-def check_dev_info(other_ns, ns, pin_file=None, removed=False):
-    if removed:
-        bpftool_prog_list(expected=0)
-        ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
-        fail(ret == 0, "Showing prog with removed device did not fail")
-        fail(err["error"].find("No such device") == -1,
-             "Showing prog with removed device expected ENODEV, error is %s" %
-             (err["error"]))
-        return
-    progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+def pin_map(file_name, idx=0, expected=1):
+    maps = bpftool_map_list(expected=expected)
+    m = maps[idx]
+    bpftool("map pin id %d %s" % (m["id"], file_name))
+    files.append(file_name)
+
+    return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+    bpftool_prog_list(expected=0)
+    ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+    fail(ret == 0, "Showing prog with removed device did not fail")
+    fail(err["error"].find("No such device") == -1,
+         "Showing prog with removed device expected ENODEV, error is %s" %
+         (err["error"]))
+
+    bpftool_map_list(expected=0)
+    ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+    fail(ret == 0, "Showing map with removed device did not fail")
+    fail(err["error"].find("No such device") == -1,
+         "Showing map with removed device expected ENODEV, error is %s" %
+         (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+    progs = bpftool_prog_list(expected=1, ns=ns)
     prog = progs[0]
 
     fail("dev" not in prog.keys(), "Device parameters not reported")
@@ -423,16 +479,17 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False):
     fail("ns_dev" not in dev.keys(), "Device parameters not reported")
     fail("ns_inode" not in dev.keys(), "Device parameters not reported")
 
-    if not removed and not other_ns:
+    if not other_ns:
         fail("ifname" not in dev.keys(), "Ifname not reported")
         fail(dev["ifname"] != sim["ifname"],
              "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
     else:
         fail("ifname" in dev.keys(), "Ifname is reported for other ns")
-        if removed:
-            fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
-            fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
-            fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
+    maps = bpftool_map_list(expected=2, ns=ns)
+    for m in maps:
+        fail("dev" not in m.keys(), "Device parameters not reported")
+        fail(dev != m["dev"], "Map's device different than program's")
 
 # Parse command line
 parser = argparse.ArgumentParser()
@@ -464,7 +521,7 @@ if out.find("/sys/kernel/debug type debugfs") == -1:
     cmd("mount -t debugfs none /sys/kernel/debug")
 
 # Check samples are compiled
-samples = ["sample_ret0.o"]
+samples = ["sample_ret0.o", "sample_map_ret0.o"]
 for s in samples:
     ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
     skip(ret != 0, "sample %s/%s not found, please compile it" %
@@ -739,8 +796,9 @@ try:
     bpftool_prog_list_wait(expected=0)
 
     sim = NetdevSim()
-    sim.set_ethtool_tc_offloads(True)
-    sim.set_xdp(obj, "offload")
+    map_obj = bpf_obj("sample_map_ret0.o")
+    start_test("Test loading program with maps...")
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
 
     start_test("Test bpftool bound info reporting (own ns)...")
     check_dev_info(False, "")
@@ -757,11 +815,111 @@ try:
     sim.set_ns("")
     check_dev_info(False, "")
 
-    pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
+    prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+    map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
     sim.remove()
 
     start_test("Test bpftool bound info reporting (removed dev)...")
-    check_dev_info(True, "", pin_file=pin_file, removed=True)
+    check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+
+    start_test("Test map update (no flags)...")
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+    maps = bpftool_map_list(expected=2)
+    array = maps[0] if maps[0]["type"] == "array" else maps[1]
+    htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+    for m in maps:
+        for i in range(2):
+            bpftool("map update id %d key %s value %s" %
+                    (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+    for m in maps:
+        ret, _ = bpftool("map update id %d key %s value %s" %
+                         (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+                         fail=False)
+        fail(ret == 0, "added too many entries")
+
+    start_test("Test map update (exists)...")
+    for m in maps:
+        for i in range(2):
+            bpftool("map update id %d key %s value %s exist" %
+                    (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+    for m in maps:
+        ret, err = bpftool("map update id %d key %s value %s exist" %
+                           (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+                           fail=False)
+        fail(ret == 0, "updated non-existing key")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map update (noexist)...")
+    for m in maps:
+        for i in range(2):
+            ret, err = bpftool("map update id %d key %s value %s noexist" %
+                               (m["id"], int2str("I", i), int2str("Q", i * 3)),
+                               fail=False)
+        fail(ret == 0, "updated existing key")
+        fail(err["error"].find("File exists") == -1,
+             "expected EEXIST, error is '%s'" % (err["error"]))
+
+    start_test("Test map dump...")
+    for m in maps:
+        _, entries = bpftool("map dump id %d" % (m["id"]))
+        for i in range(2):
+            key = str2int(entries[i]["key"])
+            fail(key != i, "expected key %d, got %d" % (key, i))
+            val = str2int(entries[i]["value"])
+            fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+    start_test("Test map getnext...")
+    for m in maps:
+        _, entry = bpftool("map getnext id %d" % (m["id"]))
+        key = str2int(entry["next_key"])
+        fail(key != 0, "next key %d, expected %d" % (key, 0))
+        _, entry = bpftool("map getnext id %d key %s" %
+                           (m["id"], int2str("I", 0)))
+        key = str2int(entry["next_key"])
+        fail(key != 1, "next key %d, expected %d" % (key, 1))
+        ret, err = bpftool("map getnext id %d key %s" %
+                           (m["id"], int2str("I", 1)), fail=False)
+        fail(ret == 0, "got next key past the end of map")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map delete (htab)...")
+    for i in range(2):
+        bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+    start_test("Test map delete (array)...")
+    for i in range(2):
+        ret, err = bpftool("map delete id %d key %s" %
+                           (htab["id"], int2str("I", i)), fail=False)
+        fail(ret == 0, "removed entry from an array")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map remove...")
+    sim.unset_xdp("offload")
+    bpftool_map_list_wait(expected=0)
+    sim.remove()
+
+    sim = NetdevSim()
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+    sim.remove()
+    bpftool_map_list_wait(expected=0)
+
+    start_test("Test map creation fail path...")
+    sim = NetdevSim()
+    sim.dfs["bpf_map_accept"] = "N"
+    ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+    fail(ret == 0,
+         "netdevsim didn't refuse to create a map with offload disabled")
 
     print("%s: OK" % (os.path.basename(__file__)))
 
-- 
cgit v1.2.3


From a55aaf6db0587e1a6e79dce8ada0e237d6068afb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 19 Jan 2018 14:17:45 +0000
Subject: bpftool: recognize BPF_MAP_TYPE_CPUMAP maps

Add BPF_MAP_TYPE_CPUMAP map type to the list
of map type recognized by bpftool and define
corresponding text representation.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Quentin Monnet <quentin.monnet@netronome.com>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a152c1a5c94c..f95fa67bb498 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -66,6 +66,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_HASH_OF_MAPS]	= "hash_of_maps",
 	[BPF_MAP_TYPE_DEVMAP]		= "devmap",
 	[BPF_MAP_TYPE_SOCKMAP]		= "sockmap",
+	[BPF_MAP_TYPE_CPUMAP]		= "cpumap",
 };
 
 static unsigned int get_possible_cpus(void)
-- 
cgit v1.2.3


From b7bcc0bbb8acb640258bb451f1f9391737da48b1 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 18 Jan 2018 17:36:24 -0700
Subject: selftests: bpf: update .gitignore with missing generated files

Update .gitignore with missing generated files.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/.gitignore | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 541d9d7fad5a..1e09d77f1948 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -3,3 +3,10 @@ test_maps
 test_lru_map
 test_lpm_map
 test_tag
+FEATURE-DUMP.libbpf
+fixdep
+test_align
+test_dev_cgroup
+test_progs
+test_verifier_log
+feature
-- 
cgit v1.2.3


From 8c417dc15f9522672795981dcb63d9099ca6bd8c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 18 Jan 2018 15:08:51 -0800
Subject: tools/bpf: add a testcase for MAP_GET_NEXT_KEY command of LPM_TRIE
 map

A test case is added in tools/testing/selftests/bpf/test_lpm_map.c
for MAP_GET_NEXT_KEY command. A four node trie, which
is described in kernel/bpf/lpm_trie.c, is built and the
MAP_GET_NEXT_KEY results are checked.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lpm_map.c | 122 +++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index f61480641b6e..081510853c6d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -521,6 +521,126 @@ static void test_lpm_delete(void)
 	close(map_fd);
 }
 
+static void test_lpm_get_next_key(void)
+{
+	struct bpf_lpm_trie_key *key_p, *next_key_p;
+	size_t key_size;
+	__u32 value = 0;
+	int map_fd;
+
+	key_size = sizeof(*key_p) + sizeof(__u32);
+	key_p = alloca(key_size);
+	next_key_p = alloca(key_size);
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
+				100, BPF_F_NO_PREALLOC);
+	assert(map_fd >= 0);
+
+	/* empty tree. get_next_key should return ENOENT */
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* get and verify the first key, get the second one should fail. */
+	key_p->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168);
+
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* no exact matching key should get the first one in post order. */
+	key_p->prefixlen = 8;
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168);
+
+	/* add one more element (total two) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total three) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total four) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* no exact matching key should return the first one in post order */
+	key_p->prefixlen = 22;
+	inet_pton(AF_INET, "192.168.1.0", key_p->data);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
+
+	close(map_fd);
+}
+
 int main(void)
 {
 	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
@@ -545,6 +665,8 @@ int main(void)
 
 	test_lpm_delete();
 
+	test_lpm_get_next_key();
+
 	printf("test_lpm: OK\n");
 	return 0;
 }
-- 
cgit v1.2.3


From 87c1793b1b7f34915e9e64cdb503efb281c769a7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 20 Jan 2018 01:24:32 +0100
Subject: bpf: add couple of test cases for div/mod by zero

Add couple of missing test cases for eBPF div/mod by zero to the
new test_verifier prog runtime feature. Also one for an empty prog
and only exit.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 87 +++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6c22edb1f006..efca10de64e9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -101,6 +101,93 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.retval = -3,
 	},
+	{
+		"DIV32 by 0, zero check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV32 by 0, zero check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 by 0, zero check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 by 0, zero check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 by 0, zero check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD64 by 0, zero check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"empty prog",
+		.insns = {
+		},
+		.errstr = "last insn is not an exit or jmp",
+		.result = REJECT,
+	},
+	{
+		"only exit insn",
+		.insns = {
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+	},
 	{
 		"unreachable",
 		.insns = {
-- 
cgit v1.2.3


From be68fb64f763b7b6ddb202e0a931f41ae62f71b0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 28 Nov 2017 14:06:39 +0530
Subject: selftest/powerpc: Add additional option to mmap_bench test

This patch adds --pgfault and --iterations options to mmap_bench test. With
--pgfault we touch every page mapped. This helps in measuring impact in the
page fault path with a patch series.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../selftests/powerpc/benchmarks/mmap_bench.c      | 53 ++++++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
index 8d084a2d6e74..7a0a462a2272 100644
--- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
+++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
@@ -7,17 +7,34 @@
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <time.h>
+#include <getopt.h>
 
 #include "utils.h"
 
 #define ITERATIONS 5000000
 
-#define MEMSIZE (128 * 1024 * 1024)
+#define MEMSIZE (1UL << 27)
+#define PAGE_SIZE (1UL << 16)
+#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE)
+
+static int pg_fault;
+static int iterations = ITERATIONS;
+
+static struct option options[] = {
+	{ "pgfault", no_argument, &pg_fault, 1 },
+	{ "iterations", required_argument, 0, 'i' },
+	{ 0, },
+};
+
+static void usage(void)
+{
+	printf("mmap_bench <--pgfault> <--iterations count>\n");
+}
 
 int test_mmap(void)
 {
 	struct timespec ts_start, ts_end;
-	unsigned long i = ITERATIONS;
+	unsigned long i = iterations;
 
 	clock_gettime(CLOCK_MONOTONIC, &ts_start);
 
@@ -25,6 +42,11 @@ int test_mmap(void)
 		char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE,
 			       MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 		FAIL_IF(c == MAP_FAILED);
+		if (pg_fault) {
+			int count;
+			for (count = 0; count < CHUNK_COUNT; count++)
+				c[count << 16] = 'c';
+		}
 		munmap(c, MEMSIZE);
 	}
 
@@ -35,7 +57,32 @@ int test_mmap(void)
 	return 0;
 }
 
-int main(void)
+int main(int argc, char *argv[])
 {
+	signed char c;
+	while (1) {
+		int option_index = 0;
+
+		c = getopt_long(argc, argv, "", options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 0:
+			if (options[option_index].flag != 0)
+				break;
+
+			usage();
+			exit(1);
+			break;
+		case 'i':
+			iterations = atoi(optarg);
+			break;
+		default:
+			usage();
+			exit(1);
+		}
+	}
 	return test_harness(test_mmap, "mmap_bench");
 }
-- 
cgit v1.2.3


From 8d1915873d492b8e1f03bbcab527db62a8d49542 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 16 Oct 2017 16:04:02 +1100
Subject: selftests/powerpc: Add alignment handler selftest

Add a selftest to exercise the powerpc alignment fault handler.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
[mpe: Add 32-bit support to the signal handler]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/alignment/Makefile |   3 +-
 .../powerpc/alignment/alignment_handler.c          | 491 +++++++++++++++++++++
 2 files changed, 493 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/alignment/alignment_handler.c

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 16b22004e75f..083a48a008b4 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -1,4 +1,5 @@
-TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
+TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \
+	paste_last_unaligned alignment_handler
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
new file mode 100644
index 000000000000..39fd362415cf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -0,0 +1,491 @@
+/*
+ * Test the powerpc alignment handler on POWER8/POWER9
+ *
+ * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * This selftest exercises the powerpc alignment fault handler.
+ *
+ * We create two sets of source and destination buffers, one in regular memory,
+ * the other cache-inhibited (we use /dev/fb0 for this).
+ *
+ * We initialise the source buffers, then use whichever set of load/store
+ * instructions is under test to copy bytes from the source buffers to the
+ * destination buffers. For the regular buffers, these instructions will
+ * execute normally. For the cache-inhibited buffers, these instructions
+ * will trap and cause an alignment fault, and the alignment fault handler
+ * will emulate the particular instruction under test. We then compare the
+ * destination buffers to ensure that the native and emulated cases give the
+ * same result.
+ *
+ * TODO:
+ *   - Any FIXMEs below
+ *   - Test VSX regs < 32 and > 32
+ *   - Test all loads and stores
+ *   - Check update forms do update register
+ *   - Test alignment faults over page boundary
+ *
+ * Some old binutils may not support all the instructions.
+ */
+
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "utils.h"
+
+int bufsize;
+int debug;
+int testing;
+volatile int gotsig;
+
+void sighandler(int sig, siginfo_t *info, void *ctx)
+{
+	struct ucontext *ucp = ctx;
+
+	if (!testing) {
+		signal(sig, SIG_DFL);
+		kill(0, sig);
+	}
+	gotsig = sig;
+#ifdef __powerpc64__
+	ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+#else
+	ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
+#endif
+}
+
+#define XFORM(reg, n)  " " #reg " ,%"#n",%2 ;"
+#define DFORM(reg, n)  " " #reg " ,0(%"#n") ;"
+
+#define TEST(name, ld_op, st_op, form, ld_reg, st_reg)		\
+	void test_##name(char *s, char *d)			\
+	{							\
+		asm volatile(					\
+			#ld_op form(ld_reg, 0)			\
+			#st_op form(st_reg, 1)			\
+			:: "r"(s), "r"(d), "r"(0)		\
+			: "memory", "vs0", "vs32", "r31");	\
+	}							\
+	rc |= do_test(#name, test_##name)
+
+#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
+#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
+#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
+#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32)
+#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32)
+#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0)
+#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32)
+#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0)
+
+#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31)
+#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31)
+#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31)
+#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31)
+
+#define LOAD_FLOAT_DFORM_TEST(op)  TEST(op, op, stfd, DFORM, 0, 0)
+#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0)
+#define LOAD_FLOAT_XFORM_TEST(op)  TEST(op, op, stfdx, XFORM, 0, 0)
+#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+
+
+/* FIXME: Unimplemented tests: */
+// STORE_DFORM_TEST(stq)   /* FIXME: need two registers for quad */
+// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */
+
+// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */
+// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+
+/* preload byte by byte */
+void preload_data(void *dst, int offset, int width)
+{
+	char *c = dst;
+	int i;
+
+	c += offset;
+
+	for (i = 0 ; i < width ; i++)
+		c[i] = i;
+}
+
+int test_memcpy(void *dst, void *src, int size, int offset,
+		void (*test_func)(char *, char *))
+{
+	char *s, *d;
+
+	s = src;
+	s += offset;
+	d = dst;
+	d += offset;
+
+	assert(size == 16);
+	gotsig = 0;
+	testing = 1;
+
+	test_func(s, d); /* run the actual test */
+
+	testing = 0;
+	if (gotsig) {
+		if (debug)
+			printf("  Got signal %i\n", gotsig);
+		return 1;
+	}
+	return 0;
+}
+
+void dumpdata(char *s1, char *s2, int n, char *test_name)
+{
+	int i;
+
+	printf("  %s: unexpected result:\n", test_name);
+	printf("    mem:");
+	for (i = 0; i < n; i++)
+		printf(" %02x", s1[i]);
+	printf("\n");
+	printf("    ci: ");
+	for (i = 0; i < n; i++)
+		printf(" %02x", s2[i]);
+	printf("\n");
+}
+
+int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
+{
+	char *s1c, *s2c;
+
+	s1c = s1;
+	s1c += offset;
+	s2c = s2;
+	s2c += offset;
+
+	if (memcmp(s1c, s2c, n)) {
+		if (debug) {
+			printf("\n  Compare failed. Offset:%i length:%i\n",
+			       offset, n);
+			dumpdata(s1c, s2c, n, test_name);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Do two memcpy tests using the same instructions. One cachable
+ * memory and the other doesn't.
+ */
+int do_test(char *test_name, void (*test_func)(char *, char *))
+{
+	int offset, width, fd, rc = 0, r;
+	void *mem0, *mem1, *ci0, *ci1;
+
+	printf("\tDoing %s:\t", test_name);
+
+	fd = open("/dev/fb0", O_RDWR);
+	if (fd < 0) {
+		printf("\n");
+		perror("Can't open /dev/fb0");
+		SKIP_IF(1);
+	}
+
+	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+		   fd, 0x0);
+	ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+		   fd, bufsize);
+	if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
+		printf("\n");
+		perror("mmap failed");
+		SKIP_IF(1);
+	}
+
+	rc = posix_memalign(&mem0, bufsize, bufsize);
+	if (rc) {
+		printf("\n");
+		return rc;
+	}
+
+	rc = posix_memalign(&mem1, bufsize, bufsize);
+	if (rc) {
+		printf("\n");
+		free(mem0);
+		return rc;
+	}
+
+	/* offset = 0 no alignment fault, so skip */
+	for (offset = 1; offset < 16; offset++) {
+		width = 16; /* vsx == 16 bytes */
+		r = 0;
+
+		/* load pattern into memory byte by byte */
+		preload_data(ci0, offset, width);
+		preload_data(mem0, offset, width); // FIXME: remove??
+		memcpy(ci0, mem0, bufsize);
+		memcpy(ci1, mem1, bufsize); /* initialise output to the same */
+
+		/* sanity check */
+		test_memcmp(mem0, ci0, width, offset, test_name);
+
+		r |= test_memcpy(ci1,  ci0,  width, offset, test_func);
+		r |= test_memcpy(mem1, mem0, width, offset, test_func);
+		if (r && !debug) {
+			printf("FAILED: Got signal");
+			break;
+		}
+
+		r |= test_memcmp(mem1, ci1, width, offset, test_name);
+		rc |= r;
+		if (r && !debug) {
+			printf("FAILED: Wrong Data");
+			break;
+		}
+	}
+	if (!r)
+		printf("PASSED");
+	printf("\n");
+
+	munmap(ci0, bufsize);
+	munmap(ci1, bufsize);
+	free(mem0);
+	free(mem1);
+
+	return rc;
+}
+
+int test_alignment_handler_vsx_206(void)
+{
+	int rc = 0;
+
+	printf("VSX: 2.06B\n");
+	LOAD_VSX_XFORM_TEST(lxvd2x);
+	LOAD_VSX_XFORM_TEST(lxvw4x);
+	LOAD_VSX_XFORM_TEST(lxsdx);
+	LOAD_VSX_XFORM_TEST(lxvdsx);
+	STORE_VSX_XFORM_TEST(stxvd2x);
+	STORE_VSX_XFORM_TEST(stxvw4x);
+	STORE_VSX_XFORM_TEST(stxsdx);
+	return rc;
+}
+
+int test_alignment_handler_vsx_207(void)
+{
+	int rc = 0;
+
+	printf("VSX: 2.07B\n");
+	LOAD_VSX_XFORM_TEST(lxsspx);
+	LOAD_VSX_XFORM_TEST(lxsiwax);
+	LOAD_VSX_XFORM_TEST(lxsiwzx);
+	STORE_VSX_XFORM_TEST(stxsspx);
+	STORE_VSX_XFORM_TEST(stxsiwx);
+	return rc;
+}
+
+int test_alignment_handler_vsx_300(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+	printf("VSX: 3.00B\n");
+	LOAD_VMX_DFORM_TEST(lxsd);
+	LOAD_VSX_XFORM_TEST(lxsibzx);
+	LOAD_VSX_XFORM_TEST(lxsihzx);
+	LOAD_VMX_DFORM_TEST(lxssp);
+	LOAD_VSX_DFORM_TEST(lxv);
+	LOAD_VSX_XFORM_TEST(lxvb16x);
+	LOAD_VSX_XFORM_TEST(lxvh8x);
+	LOAD_VSX_XFORM_TEST(lxvx);
+	LOAD_VSX_XFORM_TEST(lxvwsx);
+	LOAD_VSX_XFORM_TEST(lxvl);
+	LOAD_VSX_XFORM_TEST(lxvll);
+	STORE_VMX_DFORM_TEST(stxsd);
+	STORE_VSX_XFORM_TEST(stxsibx);
+	STORE_VSX_XFORM_TEST(stxsihx);
+	STORE_VMX_DFORM_TEST(stxssp);
+	STORE_VSX_DFORM_TEST(stxv);
+	STORE_VSX_XFORM_TEST(stxvb16x);
+	STORE_VSX_XFORM_TEST(stxvh8x);
+	STORE_VSX_XFORM_TEST(stxvx);
+	STORE_VSX_XFORM_TEST(stxvl);
+	STORE_VSX_XFORM_TEST(stxvll);
+	return rc;
+}
+
+int test_alignment_handler_integer(void)
+{
+	int rc = 0;
+
+	printf("Integer\n");
+	LOAD_DFORM_TEST(lbz);
+	LOAD_DFORM_TEST(lbzu);
+	LOAD_XFORM_TEST(lbzx);
+	LOAD_XFORM_TEST(lbzux);
+	LOAD_DFORM_TEST(lhz);
+	LOAD_DFORM_TEST(lhzu);
+	LOAD_XFORM_TEST(lhzx);
+	LOAD_XFORM_TEST(lhzux);
+	LOAD_DFORM_TEST(lha);
+	LOAD_DFORM_TEST(lhau);
+	LOAD_XFORM_TEST(lhax);
+	LOAD_XFORM_TEST(lhaux);
+	LOAD_XFORM_TEST(lhbrx);
+	LOAD_DFORM_TEST(lwz);
+	LOAD_DFORM_TEST(lwzu);
+	LOAD_XFORM_TEST(lwzx);
+	LOAD_XFORM_TEST(lwzux);
+	LOAD_DFORM_TEST(lwa);
+	LOAD_XFORM_TEST(lwax);
+	LOAD_XFORM_TEST(lwaux);
+	LOAD_XFORM_TEST(lwbrx);
+	LOAD_DFORM_TEST(ld);
+	LOAD_DFORM_TEST(ldu);
+	LOAD_XFORM_TEST(ldx);
+	LOAD_XFORM_TEST(ldux);
+	LOAD_XFORM_TEST(ldbrx);
+	LOAD_DFORM_TEST(lmw);
+	STORE_DFORM_TEST(stb);
+	STORE_XFORM_TEST(stbx);
+	STORE_DFORM_TEST(stbu);
+	STORE_XFORM_TEST(stbux);
+	STORE_DFORM_TEST(sth);
+	STORE_XFORM_TEST(sthx);
+	STORE_DFORM_TEST(sthu);
+	STORE_XFORM_TEST(sthux);
+	STORE_XFORM_TEST(sthbrx);
+	STORE_DFORM_TEST(stw);
+	STORE_XFORM_TEST(stwx);
+	STORE_DFORM_TEST(stwu);
+	STORE_XFORM_TEST(stwux);
+	STORE_XFORM_TEST(stwbrx);
+	STORE_DFORM_TEST(std);
+	STORE_XFORM_TEST(stdx);
+	STORE_DFORM_TEST(stdu);
+	STORE_XFORM_TEST(stdux);
+	STORE_XFORM_TEST(stdbrx);
+	STORE_DFORM_TEST(stmw);
+	return rc;
+}
+
+int test_alignment_handler_vmx(void)
+{
+	int rc = 0;
+
+	printf("VMX\n");
+	LOAD_VMX_XFORM_TEST(lvx);
+
+	/*
+	 * FIXME: These loads only load part of the register, so our
+	 * testing method doesn't work. Also they don't take alignment
+	 * faults, so it's kinda pointless anyway
+	 *
+	 LOAD_VMX_XFORM_TEST(lvebx)
+	 LOAD_VMX_XFORM_TEST(lvehx)
+	 LOAD_VMX_XFORM_TEST(lvewx)
+	 LOAD_VMX_XFORM_TEST(lvxl)
+	*/
+	STORE_VMX_XFORM_TEST(stvx);
+	STORE_VMX_XFORM_TEST(stvebx);
+	STORE_VMX_XFORM_TEST(stvehx);
+	STORE_VMX_XFORM_TEST(stvewx);
+	STORE_VMX_XFORM_TEST(stvxl);
+	return rc;
+}
+
+int test_alignment_handler_fp(void)
+{
+	int rc = 0;
+
+	printf("Floating point\n");
+	LOAD_FLOAT_DFORM_TEST(lfd);
+	LOAD_FLOAT_XFORM_TEST(lfdx);
+	LOAD_FLOAT_DFORM_TEST(lfdp);
+	LOAD_FLOAT_XFORM_TEST(lfdpx);
+	LOAD_FLOAT_DFORM_TEST(lfdu);
+	LOAD_FLOAT_XFORM_TEST(lfdux);
+	LOAD_FLOAT_DFORM_TEST(lfs);
+	LOAD_FLOAT_XFORM_TEST(lfsx);
+	LOAD_FLOAT_DFORM_TEST(lfsu);
+	LOAD_FLOAT_XFORM_TEST(lfsux);
+	LOAD_FLOAT_XFORM_TEST(lfiwzx);
+	LOAD_FLOAT_XFORM_TEST(lfiwax);
+	STORE_FLOAT_DFORM_TEST(stfd);
+	STORE_FLOAT_XFORM_TEST(stfdx);
+	STORE_FLOAT_DFORM_TEST(stfdp);
+	STORE_FLOAT_XFORM_TEST(stfdpx);
+	STORE_FLOAT_DFORM_TEST(stfdu);
+	STORE_FLOAT_XFORM_TEST(stfdux);
+	STORE_FLOAT_DFORM_TEST(stfs);
+	STORE_FLOAT_XFORM_TEST(stfsx);
+	STORE_FLOAT_DFORM_TEST(stfsu);
+	STORE_FLOAT_XFORM_TEST(stfsux);
+	STORE_FLOAT_XFORM_TEST(stfiwx);
+
+	return rc;
+}
+
+void usage(char *prog)
+{
+	printf("Usage: %s [options]\n", prog);
+	printf("  -d	Enable debug error output\n");
+	printf("\n");
+	printf("This test requires a POWER8 or POWER9 CPU and a usable ");
+	printf("framebuffer at /dev/fb0.\n");
+}
+
+int main(int argc, char *argv[])
+{
+
+	struct sigaction sa;
+	int rc = 0;
+	int option = 0;
+
+	while ((option = getopt(argc, argv, "d")) != -1) {
+		switch (option) {
+		case 'd':
+			debug++;
+			break;
+		default:
+			usage(argv[0]);
+			exit(1);
+		}
+	}
+
+	bufsize = getpagesize();
+
+	sa.sa_sigaction = sighandler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL) == -1
+	    || sigaction(SIGBUS, &sa, NULL) == -1
+	    || sigaction(SIGILL, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(1);
+	}
+
+	rc |= test_harness(test_alignment_handler_vsx_206,
+			   "test_alignment_handler_vsx_206");
+	rc |= test_harness(test_alignment_handler_vsx_207,
+			   "test_alignment_handler_vsx_207");
+	rc |= test_harness(test_alignment_handler_vsx_300,
+			   "test_alignment_handler_vsx_300");
+	rc |= test_harness(test_alignment_handler_integer,
+			   "test_alignment_handler_integer");
+	rc |= test_harness(test_alignment_handler_vmx,
+			   "test_alignment_handler_vmx");
+	rc |= test_harness(test_alignment_handler_fp,
+			   "test_alignment_handler_fp");
+	return rc;
+}
-- 
cgit v1.2.3


From a08082f8e4e1c292df174a9ed303cfbff2fbe2cb Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.vnet.ibm.com>
Date: Sun, 31 Dec 2017 18:20:46 -0500
Subject: powerpc/selftests: Check endianness on trap in TM

Add a selftest to check if endianness is flipped inadvertently to BE
(MSR.LE set to zero) on BE and LE machines when a trap is caught in
transactional mode and load_fp and load_vec are zero, i.e. when MSR.FP
and MSR.VEC are zeroed (disabled).

Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/tm/.gitignore |   1 +
 tools/testing/selftests/powerpc/tm/Makefile   |   3 +-
 tools/testing/selftests/powerpc/tm/tm-trap.c  | 329 ++++++++++++++++++++++++++
 3 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-trap.c

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 241a4a4ee0e4..bb90d4b79524 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -13,3 +13,4 @@ tm-signal-context-chk-vmx
 tm-signal-context-chk-vsx
 tm-vmx-unavail
 tm-unavailable
+tm-trap
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 8ed6f8c57230..a23453943ad2 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -3,7 +3,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
 	tm-signal-context-chk-vmx tm-signal-context-chk-vsx
 
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
-	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \
+	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
 	$(SIGNAL_CONTEXT_CHK_TESTS)
 
 include ../../lib.mk
@@ -18,6 +18,7 @@ $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
 $(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
 $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
+$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
 
 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
new file mode 100644
index 000000000000..5d92c23ee6cb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2017, Gustavo Romero, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Check if thread endianness is flipped inadvertently to BE on trap
+ * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
+ * load_fp and load_vec overflowed).
+ *
+ * The issue can be checked on LE machines simply by zeroing load_fp
+ * and load_vec and then causing a trap in TM. Since the endianness
+ * changes to BE on return from the signal handler, 'nop' is
+ * thread as an illegal instruction in following sequence:
+ *	tbegin.
+ *	beq 1f
+ *	trap
+ *	tend.
+ * 1:	nop
+ *
+ * However, although the issue is also present on BE machines, it's a
+ * bit trickier to check it on BE machines because MSR.LE bit is set
+ * to zero which determines a BE endianness that is the native
+ * endianness on BE machines, so nothing notably critical happens,
+ * i.e. no illegal instruction is observed immediately after returning
+ * from the signal handler (as it happens on LE machines). Thus to test
+ * it on BE machines LE endianness is forced after a first trap and then
+ * the endianness is verified on subsequent traps to determine if the
+ * endianness "flipped back" to the native endianness (BE).
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <htmintrin.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include "tm.h"
+#include "utils.h"
+
+#define pr_error(error_code, format, ...) \
+	error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#define MSR_LE 1UL
+#define LE     1UL
+
+pthread_t t0_ping;
+pthread_t t1_pong;
+
+int exit_from_pong;
+
+int trap_event;
+int le;
+
+bool success;
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+	ucontext_t *ucp = uc;
+	uint64_t thread_endianness;
+
+	/* Get thread endianness: extract bit LE from MSR */
+	thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
+
+	/***
+	 * Little-Endian Machine
+	 */
+
+	if (le) {
+		/* First trap event */
+		if (trap_event == 0) {
+			/* Do nothing. Since it is returning from this trap
+			 * event that endianness is flipped by the bug, so just
+			 * let the process return from the signal handler and
+			 * check on the second trap event if endianness is
+			 * flipped or not.
+			 */
+		}
+		/* Second trap event */
+		else if (trap_event == 1) {
+			/*
+			 * Since trap was caught in TM on first trap event, if
+			 * endianness was still LE (not flipped inadvertently)
+			 * after returning from the signal handler instruction
+			 * (1) is executed (basically a 'nop'), as it's located
+			 * at address of tbegin. +4 (rollback addr). As (1) on
+			 * LE endianness does in effect nothing, instruction (2)
+			 * is then executed again as 'trap', generating a second
+			 * trap event (note that in that case 'trap' is caught
+			 * not in transacional mode). On te other hand, if after
+			 * the return from the signal handler the endianness in-
+			 * advertently flipped, instruction (1) is tread as a
+			 * branch instruction, i.e. b .+8, hence instruction (3)
+			 * and (4) are executed (tbegin.; trap;) and we get sim-
+			 * ilaly on the trap signal handler, but now in TM mode.
+			 * Either way, it's now possible to check the MSR LE bit
+			 * once in the trap handler to verify if endianness was
+			 * flipped or not after the return from the second trap
+			 * event. If endianness is flipped, the bug is present.
+			 * Finally, getting a trap in TM mode or not is just
+			 * worth noting because it affects the math to determine
+			 * the offset added to the NIP on return: the NIP for a
+			 * trap caught in TM is the rollback address, i.e. the
+			 * next instruction after 'tbegin.', whilst the NIP for
+			 * a trap caught in non-transactional mode is the very
+			 * same address of the 'trap' instruction that generated
+			 * the trap event.
+			 */
+
+			if (thread_endianness == LE) {
+				/* Go to 'success', i.e. instruction (6) */
+				ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
+			} else {
+				/*
+				 * Thread endianness is BE, so it flipped
+				 * inadvertently. Thus we flip back to LE and
+				 * set NIP to go to 'failure', instruction (5).
+				 */
+				ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+				ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+			}
+		}
+	}
+
+	/***
+	 * Big-Endian Machine
+	 */
+
+	else {
+		/* First trap event */
+		if (trap_event == 0) {
+			/*
+			 * Force thread endianness to be LE. Instructions (1),
+			 * (3), and (4) will be executed, generating a second
+			 * trap in TM mode.
+			 */
+			ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+		}
+		/* Second trap event */
+		else if (trap_event == 1) {
+			/*
+			 * Do nothing. If bug is present on return from this
+			 * second trap event endianness will flip back "automat-
+			 * ically" to BE, otherwise thread endianness will
+			 * continue to be LE, just as it was set above.
+			 */
+		}
+		/* A third trap event */
+		else {
+			/*
+			 * Once here it means that after returning from the sec-
+			 * ond trap event instruction (4) (trap) was executed
+			 * as LE, generating a third trap event. In that case
+			 * endianness is still LE as set on return from the
+			 * first trap event, hence no bug. Otherwise, bug
+			 * flipped back to BE on return from the second trap
+			 * event and instruction (4) was executed as 'tdi' (so
+			 * basically a 'nop') and branch to 'failure' in
+			 * instruction (5) was taken to indicate failure and we
+			 * never get here.
+			 */
+
+			/*
+			 * Flip back to BE and go to instruction (6), i.e. go to
+			 * 'success'.
+			 */
+			ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
+			ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
+		}
+	}
+
+	trap_event++;
+}
+
+void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
+{
+	/* Got a USR1 signal from ping(), so just tell pong() to exit */
+	exit_from_pong = 1;
+}
+
+void *ping(void *not_used)
+{
+	uint64_t i;
+
+	trap_event = 0;
+
+	/*
+	 * Wait an amount of context switches so load_fp and load_vec overflows
+	 * and MSR_[FP|VEC|V] is 0.
+	 */
+	for (i = 0; i < 1024*1024*512; i++)
+		;
+
+	asm goto(
+		/*
+		 * [NA] means "Native Endianness", i.e. it tells how a
+		 * instruction is executed on machine's native endianness (in
+		 * other words, native endianness matches kernel endianness).
+		 * [OP] means "Opposite Endianness", i.e. on a BE machine, it
+		 * tells how a instruction is executed as a LE instruction; con-
+		 * versely, on a LE machine, it tells how a instruction is
+		 * executed as a BE instruction. When [NA] is omitted, it means
+		 * that the native interpretation of a given instruction is not
+		 * relevant for the test. Likewise when [OP] is omitted.
+		 */
+
+		" tbegin.        ;" /* (0) tbegin. [NA]                    */
+		" tdi  0, 0, 0x48;" /* (1) nop     [NA]; b (3) [OP]        */
+		" trap           ;" /* (2) trap    [NA]                    */
+		".long 0x1D05007C;" /* (3) tbegin. [OP]                    */
+		".long 0x0800E07F;" /* (4) trap    [OP]; nop   [NA]        */
+		" b %l[failure]  ;" /* (5) b [NA]; MSR.LE flipped (bug)    */
+		" b %l[success]  ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
+
+		: : : : failure, success);
+
+failure:
+	success = false;
+	goto exit_from_ping;
+
+success:
+	success = true;
+
+exit_from_ping:
+	/* Tell pong() to exit before leaving */
+	pthread_kill(t1_pong, SIGUSR1);
+	return NULL;
+}
+
+void *pong(void *not_used)
+{
+	while (!exit_from_pong)
+		/*
+		 * Induce context switches on ping() thread
+		 * until ping() finishes its job and signs
+		 * to exit from this loop.
+		 */
+		sched_yield();
+
+	return NULL;
+}
+
+int tm_trap_test(void)
+{
+	uint16_t k = 1;
+
+	int rc;
+
+	pthread_attr_t attr;
+	cpu_set_t cpuset;
+
+	struct sigaction trap_sa;
+
+	trap_sa.sa_flags = SA_SIGINFO;
+	trap_sa.sa_sigaction = trap_signal_handler;
+	sigaction(SIGTRAP, &trap_sa, NULL);
+
+	struct sigaction usr1_sa;
+
+	usr1_sa.sa_flags = SA_SIGINFO;
+	usr1_sa.sa_sigaction = usr1_signal_handler;
+	sigaction(SIGUSR1, &usr1_sa, NULL);
+
+	/* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+
+	/* Init pthread attribute */
+	rc = pthread_attr_init(&attr);
+	if (rc)
+		pr_error(rc, "pthread_attr_init()");
+
+	/*
+	 * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
+	 * speed up context switches on ping() thread, speeding up the load_fp
+	 * and load_vec overflow.
+	 */
+	rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	if (rc)
+		pr_error(rc, "pthread_attr_setaffinity()");
+
+	/* Figure out the machine endianness */
+	le = (int) *(uint8_t *)&k;
+
+	printf("%s machine detected. Checking if endianness flips %s",
+		le ? "Little-Endian" : "Big-Endian",
+		"inadvertently on trap in TM... ");
+
+	rc = fflush(0);
+	if (rc)
+		pr_error(rc, "fflush()");
+
+	/* Launch ping() */
+	rc = pthread_create(&t0_ping, &attr, ping, NULL);
+	if (rc)
+		pr_error(rc, "pthread_create()");
+
+	exit_from_pong = 0;
+
+	/* Launch pong() */
+	rc = pthread_create(&t1_pong, &attr, pong, NULL);
+	if (rc)
+		pr_error(rc, "pthread_create()");
+
+	rc = pthread_join(t0_ping, NULL);
+	if (rc)
+		pr_error(rc, "pthread_join()");
+
+	rc = pthread_join(t1_pong, NULL);
+	if (rc)
+		pr_error(rc, "pthread_join()");
+
+	if (success) {
+		printf("no.\n"); /* no, endianness did not flip inadvertently */
+		return EXIT_SUCCESS;
+	}
+
+	printf("yes!\n"); /* yes, endianness did flip inadvertently */
+	return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness(tm_trap_test, "tm_trap_test");
+}
-- 
cgit v1.2.3


From ef54cf0c600fb8f5737fb001a9e357edda1a1de8 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 17 Jan 2018 12:07:30 -0700
Subject: usbip: prevent bind loops on devices attached to vhci_hcd

usbip host binds to devices attached to vhci_hcd on the same server
when user does attach over localhost or specifies the server as the
remote.

usbip attach -r localhost -b busid
or
usbip attach -r servername (or server IP)

Unbind followed by bind works, however device is left in a bad state with
accesses via the attached busid result in errors and system hangs during
shutdown.

Fix it to check and bail out if the device is already attached to vhci_hcd.

Cc: stable@vger.kernel.org
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/src/usbip_bind.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c
index fa46141ae68b..e121cfb1746a 100644
--- a/tools/usb/usbip/src/usbip_bind.c
+++ b/tools/usb/usbip/src/usbip_bind.c
@@ -144,6 +144,7 @@ static int bind_device(char *busid)
 	int rc;
 	struct udev *udev;
 	struct udev_device *dev;
+	const char *devpath;
 
 	/* Check whether the device with this bus ID exists. */
 	udev = udev_new();
@@ -152,8 +153,16 @@ static int bind_device(char *busid)
 		err("device with the specified bus ID does not exist");
 		return -1;
 	}
+	devpath = udev_device_get_devpath(dev);
 	udev_unref(udev);
 
+	/* If the device is already attached to vhci_hcd - bail out */
+	if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+		err("bind loop detected: device: %s is attached to %s\n",
+		    devpath, USBIP_VHCI_DRV_NAME);
+		return -1;
+	}
+
 	rc = unbind_other(busid);
 	if (rc == UNBIND_ST_FAILED) {
 		err("could not unbind driver from device on busid %s", busid);
-- 
cgit v1.2.3


From ef824501f50846589f02173d73ce3fe6021a9d2a Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 17 Jan 2018 12:08:03 -0700
Subject: usbip: list: don't list devices attached to vhci_hcd

usbip host lists devices attached to vhci_hcd on the same server
when user does attach over localhost or specifies the server as the
remote.

usbip attach -r localhost -b busid
or
usbip attach -r servername (or server IP)

Fix it to check and not list devices that are attached to vhci_hcd.

Cc: stable@vger.kernel.org
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/usb/usbip/src/usbip_list.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c
index f1b38e866dd7..d65a9f444174 100644
--- a/tools/usb/usbip/src/usbip_list.c
+++ b/tools/usb/usbip/src/usbip_list.c
@@ -187,6 +187,7 @@ static int list_devices(bool parsable)
 	const char *busid;
 	char product_name[128];
 	int ret = -1;
+	const char *devpath;
 
 	/* Create libudev context. */
 	udev = udev_new();
@@ -209,6 +210,14 @@ static int list_devices(bool parsable)
 		path = udev_list_entry_get_name(dev_list_entry);
 		dev = udev_device_new_from_syspath(udev, path);
 
+		/* Ignore devices attached to vhci_hcd */
+		devpath = udev_device_get_devpath(dev);
+		if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+			dbg("Skip the device %s already attached to %s\n",
+			    devpath, USBIP_VHCI_DRV_NAME);
+			continue;
+		}
+
 		/* Get device information. */
 		idVendor = udev_device_get_sysattr_value(dev, "idVendor");
 		idProduct = udev_device_get_sysattr_value(dev, "idProduct");
-- 
cgit v1.2.3


From cc08d1b91d1e0679d015bf7a99aedb28e2c89e12 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Thu, 18 Jan 2018 22:26:25 +0000
Subject: selftests/x86: Add <test_name>{,_32,_64} targets

One can only use `make all` or `make <test_name>_<bitness>`
as make targets.
`make <test_name>` doesn't work as Ingo noticed:
  x86> make test_vsyscall
  gcc -O2 -g -std=gnu99 -pthread -Wall -no-pie test_vsyscall.c -o test_vsyscall
  /tmp/aBaoo3nb.o: In function `init_vdso':
  test_vsyscall.c:68: undefined reference to `dlopen'
  test_vsyscall.c:76: undefined reference to `dlsym'
  test_vsyscall.c:80: undefined reference to `dlsym'
  test_vsyscall.c:84: undefined reference to `dlsym'
  test_vsyscall.c:88: undefined reference to `dlsym'
  test_vsyscall.c:70: undefined reference to `dlopen'
  collect2: error: ld returned 1 exit status
  <builtin>: recipe for target 'test_vsyscall' failed
  make: *** [test_vsyscall] Error 1

Makefile target substitution neither works :-/

Generate .PHONY targets per-test and fix target substitution.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: x86@kernel.org
Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/x86/Makefile | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 939a337128db..e7324fbc76b1 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -27,14 +27,26 @@ UNAME_M := $(shell uname -m)
 CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
 CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
 
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
 endif
 
 all_32: $(BINARIES_32)
-- 
cgit v1.2.3


From 0e9e327d1622c332e5b1db272d5c97af82164f68 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 18 Jan 2018 17:11:08 -0700
Subject: selftests: vm: update .gitignore with missing generated file

Add missing generated file to .gitignore.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/vm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 1ca2ee4d15b9..63c94d776e89 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -10,3 +10,4 @@ userfaultfd
 mlock-intersect-test
 mlock-random-test
 virtual_address_range
+gup_benchmark
-- 
cgit v1.2.3


From 35136920e100b85b15b2cfd1505453ba5b6c757f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 22 Jan 2018 22:10:59 -0800
Subject: tools/bpf: fix a test failure in selftests prog test_verifier

Commit 111e6b45315c ("selftests/bpf: make test_verifier run most programs")
enables tools/testing/selftests/bpf/test_verifier unit cases to run
via bpf_prog_test_run command. With the latest code base,
test_verifier had one test case failure:

  ...
  #473/p check deducing bounds from const, 2 FAIL retval 1 != 0
  0: (b7) r0 = 1
  1: (75) if r0 s>= 0x1 goto pc+1
   R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
  2: (95) exit

  from 1 to 3: R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
  3: (d5) if r0 s<= 0x1 goto pc+1
   R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
  4: (95) exit

  from 3 to 5: R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
  5: (1f) r1 -= r0
  6: (95) exit
  processed 7 insns (limit 131072), stack depth 0
  ...

The test case does not set return value in the test
structure and hence the return value from the prog run
is assumed to be 0. However, the actual return value is 1.
As a result, the test failed. The fix is to correctly set
the return value in the test structure.

Fixes: 111e6b45315c ("selftests/bpf: make test_verifier run most programs")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index fb82d29ee863..9e7075b268be 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8766,6 +8766,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 1,
 	},
 	{
 		"check deducing bounds from const, 3",
-- 
cgit v1.2.3


From 1a97cf1fe50340c5e758d7a74419d8f6e8b49ace Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 22 Jan 2018 17:46:57 -0800
Subject: selftests/bpf: speedup test_maps

test_hashmap_walk takes very long time on debug kernel with kasan on.
Reduce the number of iterations in this test without sacrificing
test coverage.
Also add printfs as progress indicator.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_maps.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 040356ecc862..f0d2f09898a3 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -242,7 +242,7 @@ static void test_hashmap_percpu(int task, void *data)
 
 static void test_hashmap_walk(int task, void *data)
 {
-	int fd, i, max_entries = 100000;
+	int fd, i, max_entries = 1000;
 	long long key, value, next_key;
 	bool next_key_valid = true;
 
@@ -931,8 +931,12 @@ static void test_map_large(void)
 	close(fd);
 }
 
-static void run_parallel(int tasks, void (*fn)(int task, void *data),
-			 void *data)
+#define run_parallel(N, FN, DATA) \
+	printf("Fork %d tasks to '" #FN "'\n", N); \
+	__run_parallel(N, FN, DATA)
+
+static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+			   void *data)
 {
 	pid_t pid[tasks];
 	int i;
@@ -972,7 +976,7 @@ static void test_map_stress(void)
 #define DO_UPDATE 1
 #define DO_DELETE 0
 
-static void do_work(int fn, void *data)
+static void test_update_delete(int fn, void *data)
 {
 	int do_update = ((int *)data)[1];
 	int fd = ((int *)data)[0];
@@ -1012,7 +1016,7 @@ static void test_map_parallel(void)
 	 */
 	data[0] = fd;
 	data[1] = DO_UPDATE;
-	run_parallel(TASKS, do_work, data);
+	run_parallel(TASKS, test_update_delete, data);
 
 	/* Check that key=0 is already there. */
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
@@ -1035,7 +1039,7 @@ static void test_map_parallel(void)
 
 	/* Now let's delete all elemenets in parallel. */
 	data[1] = DO_DELETE;
-	run_parallel(TASKS, do_work, data);
+	run_parallel(TASKS, test_update_delete, data);
 
 	/* Nothing should be left. */
 	key = -1;
-- 
cgit v1.2.3


From 8e6875250a1189e0d8db8b05e18abe63c2744521 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 22 Jan 2018 20:48:40 -0800
Subject: selftests/bpf: fix test_dev_cgroup

The test incorrectly doing
mkdir /mnt/cgroup-test-work-dirtest-bpf-based-device-cgroup
instead of
mkdir /mnt/cgroup-test-work-dir/test-bpf-based-device-cgroup

somehow such mkdir succeeds and new directory appears:
/mnt/cgroup-test-work-dir/cgroup-test-work-dirtest-bpf-based-device-cgroup

Later cleanup via nftw("/mnt/cgroup-test-work-dir", ...);
doesn't walk this directory.
"rmdir /mnt/cgroup-test-work-dir" succeeds, but bpf program and
dangling cgroup stays in memory.
That's a separate issue on a cgroup side.
For now fix the test.

Fixes: 37f1ba0909df ("selftests/bpf: add a test for device cgroup controller")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_dev_cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index c1535b34f14f..3489cc283433 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -21,7 +21,7 @@
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
-#define TEST_CGROUP "test-bpf-based-device-cgroup/"
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
 
 int main(int argc, char **argv)
 {
-- 
cgit v1.2.3


From 783687810e986a15ffbf86c516a1a48ff37f38f7 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Tue, 23 Jan 2018 13:30:44 +0900
Subject: bpf: test_maps: cleanup sockmaps when test ends

Bug: BPF programs and maps related to sockmaps test exist
in memory even after test_maps ends.

This patch fixes it as a short term workaround (sockmap
kernel side needs real fixing) by empyting sockmaps when
test ends.

Fixes: 6f6d33f3b3d0f ("bpf: selftests add sockmap tests")
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
[ daniel: Note on workaround. ]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_maps.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index f0d2f09898a3..436c4c72414f 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -463,7 +463,7 @@ static void test_devmap(int task, void *data)
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
 static void test_sockmap(int tasks, void *data)
 {
-	int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
+	int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
 	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
 	int ports[] = {50200, 50201, 50202, 50204};
 	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
@@ -868,9 +868,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
-	/* Test map close sockets */
-	for (i = 0; i < 6; i++)
+	/* Test map close sockets and empty maps */
+	for (i = 0; i < 6; i++) {
+		bpf_map_delete_elem(map_fd_tx, &i);
+		bpf_map_delete_elem(map_fd_rx, &i);
 		close(sfd[i]);
+	}
 	close(fd);
 	close(map_fd_rx);
 	bpf_object__close(obj);
@@ -881,8 +884,13 @@ out:
 	printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
 	exit(1);
 out_sockmap:
-	for (i = 0; i < 6; i++)
+	for (i = 0; i < 6; i++) {
+		if (map_fd_tx)
+			bpf_map_delete_elem(map_fd_tx, &i);
+		if (map_fd_rx)
+			bpf_map_delete_elem(map_fd_rx, &i);
 		close(sfd[i]);
+	}
 	close(fd);
 	exit(1);
 }
-- 
cgit v1.2.3


From caf952288d715df32b2e70c64506849287844fe4 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 23 Jan 2018 11:22:53 -0800
Subject: selftests/bpf: add checks on extack messages for eBPF hw offload
 tests

Add checks to test that netlink extack messages are correctly displayed
in some expected error cases for eBPF offload to netdevsim with TC and
XDP.

iproute2 may be built without libmnl support, in which case the extack
messages will not be reported.  Try to detect this condition, and when
enountered print a mild warning to the user and skip the extack validation.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_offload.py | 112 +++++++++++++++++++++-------
 1 file changed, 86 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 833b9c1ec450..8dca4dc6c193 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -26,6 +26,7 @@ import time
 
 logfile = None
 log_level = 1
+skip_extack = False
 bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
 pp = pprint.PrettyPrinter()
 devs = [] # devices we created for clean up
@@ -132,7 +133,7 @@ def rm(f):
     if f in files:
         files.remove(f)
 
-def tool(name, args, flags, JSON=True, ns="", fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
     params = ""
     if JSON:
         params += "%s " % (flags["json"])
@@ -140,9 +141,20 @@ def tool(name, args, flags, JSON=True, ns="", fail=True):
     if ns != "":
         ns = "ip netns exec %s " % (ns)
 
-    ret, out = cmd(ns + name + " " + params + args, fail=fail)
-    if JSON and len(out.strip()) != 0:
-        return ret, json.loads(out)
+    if include_stderr:
+        ret, stdout, stderr = cmd(ns + name + " " + params + args,
+                                  fail=fail, include_stderr=True)
+    else:
+        ret, stdout = cmd(ns + name + " " + params + args,
+                          fail=fail, include_stderr=False)
+
+    if JSON and len(stdout.strip()) != 0:
+        out = json.loads(stdout)
+    else:
+        out = stdout
+
+    if include_stderr:
+        return ret, out, stderr
     else:
         return ret, out
 
@@ -181,13 +193,15 @@ def bpftool_map_list_wait(expected=0, n_retry=20):
         time.sleep(0.05)
     raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
 
-def ip(args, force=False, JSON=True, ns="", fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
     if force:
         args = "-force " + args
-    return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
+    return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
 
-def tc(args, JSON=True, ns="", fail=True):
-    return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+    return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
 
 def ethtool(dev, opt, args, fail=True):
     return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -348,13 +362,17 @@ class NetdevSim:
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
 
-    def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True):
+    def set_xdp(self, bpf, mode, force=False, JSON=True,
+                fail=True, include_stderr=False):
         return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
-                  force=force, JSON=JSON, fail=fail)
+                  force=force, JSON=JSON,
+                  fail=fail, include_stderr=include_stderr)
 
-    def unset_xdp(self, mode, force=False, JSON=True, fail=True):
+    def unset_xdp(self, mode, force=False, JSON=True,
+                  fail=True, include_stderr=False):
         return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
-                  force=force, JSON=JSON, fail=fail)
+                  force=force, JSON=JSON,
+                  fail=fail, include_stderr=include_stderr)
 
     def ip_link_show(self, xdp):
         _, link = ip("link show dev %s" % (self['ifname']))
@@ -410,7 +428,7 @@ class NetdevSim:
         return filters
 
     def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False,
-                           fail=True):
+                           fail=True, include_stderr=False):
         params = ""
         if da:
             params += " da"
@@ -419,7 +437,8 @@ class NetdevSim:
         if skip_hw:
             params += " skip_hw"
         return tc("filter add dev %s ingress bpf %s %s" %
-                  (self['ifname'], bpf, params), fail=fail)
+                  (self['ifname'], bpf, params),
+                  fail=fail, include_stderr=include_stderr)
 
     def set_ethtool_tc_offloads(self, enable, fail=True):
         args = "hw-tc-offload %s" % ("on" if enable else "off")
@@ -491,6 +510,16 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
         fail("dev" not in m.keys(), "Device parameters not reported")
         fail(dev != m["dev"], "Map's device different than program's")
 
+def check_extack(output, reference, args):
+    if skip_extack:
+        return
+    lines = output.split("\n")
+    comp = len(lines) >= 2 and lines[1] == reference
+    fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+    check_extack(output, "Error: netdevsim: " + reference, args)
+
 # Parse command line
 parser = argparse.ArgumentParser()
 parser.add_argument("--log", help="output verbose log to given file")
@@ -527,6 +556,14 @@ for s in samples:
     skip(ret != 0, "sample %s/%s not found, please compile it" %
          (bpf_test_dir, s))
 
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+                fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+    print("Warning: no extack message in iproute2 output, libmnl missing?")
+    log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+    skip_extack = True
+
 # Check if net namespaces seem to work
 ns = mknetns()
 skip(ns is None, "Could not create a net namespace")
@@ -558,8 +595,10 @@ try:
     sim.tc_flush_filters()
 
     start_test("Test TC offloads are off by default...")
-    ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+    ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+                                         fail=False, include_stderr=True)
     fail(ret == 0, "TC filter loaded without enabling TC offloads")
+    check_extack(err, "Error: TC offload is disabled on net device.", args)
     sim.wait_for_flush()
 
     sim.set_ethtool_tc_offloads(True)
@@ -587,8 +626,11 @@ try:
     sim.dfs["bpf_tc_non_bound_accept"] = "N"
 
     start_test("Test TC cBPF unbound bytecode doesn't offload...")
-    ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False)
+    ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+                                         fail=False, include_stderr=True)
     fail(ret == 0, "TC bytecode loaded for offload")
+    check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+                      args)
     sim.wait_for_flush()
 
     start_test("Test TC offloads work...")
@@ -669,16 +711,24 @@ try:
          "Device parameters reported for non-offloaded program")
 
     start_test("Test XDP prog replace with bad flags...")
-    ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
+    ret, _, err = sim.set_xdp(obj, "offload", force=True,
+                              fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
-    ret, _ = sim.set_xdp(obj, "", force=True, fail=False)
+    check_extack_nsim(err, "program loaded with different flags.", args)
+    ret, _, err = sim.set_xdp(obj, "", force=True,
+                              fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
+    check_extack_nsim(err, "program loaded with different flags.", args)
 
     start_test("Test XDP prog remove with bad flags...")
-    ret, _ = sim.unset_xdp("offload", force=True, fail=False)
+    ret, _, err = sim.unset_xdp("offload", force=True,
+                                fail=False, include_stderr=True)
     fail(ret == 0, "Removed program with a bad mode mode")
-    ret, _ = sim.unset_xdp("", force=True, fail=False)
+    check_extack_nsim(err, "program loaded with different flags.", args)
+    ret, _, err = sim.unset_xdp("", force=True,
+                                fail=False, include_stderr=True)
     fail(ret == 0, "Removed program with a bad mode mode")
+    check_extack_nsim(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
@@ -687,8 +737,9 @@ try:
     sim.unset_xdp("drv")
     bpftool_prog_list_wait(expected=0)
     sim.set_mtu(9000)
-    ret, _ = sim.set_xdp(obj, "drv", fail=False)
+    ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
     fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+    check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
     sim.set_mtu(1500)
 
     sim.wait_for_flush()
@@ -724,25 +775,32 @@ try:
     sim2.set_xdp(obj, "offload")
     pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
 
-    ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+    ret, _, err = sim.set_xdp(pinned, "offload",
+                              fail=False, include_stderr=True)
     fail(ret == 0, "Pinned program loaded for a different device accepted")
+    check_extack_nsim(err, "program bound to different dev.", args)
     sim2.remove()
-    ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+    ret, _, err = sim.set_xdp(pinned, "offload",
+                              fail=False, include_stderr=True)
     fail(ret == 0, "Pinned program loaded for a removed device accepted")
+    check_extack_nsim(err, "xdpoffload of non-bound program.", args)
     rm(pin_file)
     bpftool_prog_list_wait(expected=0)
 
     start_test("Test mixing of TC and XDP...")
     sim.tc_add_ingress()
     sim.set_xdp(obj, "offload")
-    ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+    ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+                                         fail=False, include_stderr=True)
     fail(ret == 0, "Loading TC when XDP active should fail")
+    check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
     sim.unset_xdp("offload")
     sim.wait_for_flush()
 
     sim.cls_bpf_add_filter(obj, skip_sw=True)
-    ret, _ = sim.set_xdp(obj, "offload", fail=False)
+    ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
     fail(ret == 0, "Loading XDP when TC active should fail")
+    check_extack_nsim(err, "TC program is already loaded.", args)
 
     start_test("Test binding TC from pinned...")
     pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
@@ -765,8 +823,10 @@ try:
 
     start_test("Test asking for TC offload of two filters...")
     sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
-    ret, _ = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, fail=False)
+    ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
     fail(ret == 0, "Managed to offload two TC filters at the same time")
+    check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
 
     sim.tc_flush_filters(bound=2, total=2)
 
-- 
cgit v1.2.3


From 9045bdc8ed4e4e2c713d8d38bda9aa506b4bd8c5 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 23 Jan 2018 11:22:55 -0800
Subject: selftests/bpf: check bpf verifier log buffer usage works for HW
 offload

Make netdevsim print a message to the BPF verifier log buffer when a
program is offloaded.

Then use this message in hardware offload selftests to make sure that
using this buffer actually prints the message to the console for
eBPF hardware offload.

The message is appended after the last instruction is processed with the
verifying function from netdevsim. Output looks like the following:

    $ tc filter add dev foo ingress bpf obj sample_ret0.o \
        sec .text verbose skip_sw

    Prog section '.text' loaded (5)!
     - Type:         3
     - Instructions: 2 (0 over limit)
     - License:

    Verifier analysis:

    0: (b7) r0 = 0
    1: (95) exit
    [netdevsim] Hello from netdevsim!
    processed 2 insns, stack depth 0

"verbose" flag is required to see it in the console since netdevsim does
not throw an error after printing the message.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_offload.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 8dca4dc6c193..d128a16ee9a8 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -362,8 +362,10 @@ class NetdevSim:
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
 
-    def set_xdp(self, bpf, mode, force=False, JSON=True,
+    def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
                 fail=True, include_stderr=False):
+        if verbose:
+            bpf += " verbose"
         return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
                   force=force, JSON=JSON,
                   fail=fail, include_stderr=include_stderr)
@@ -427,11 +429,13 @@ class NetdevSim:
                  (len(filters), expected))
         return filters
 
-    def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False,
-                           fail=True, include_stderr=False):
+    def cls_bpf_add_filter(self, bpf, da=False, verbose=False, skip_sw=False,
+                           skip_hw=False, fail=True, include_stderr=False):
         params = ""
         if da:
             params += " da"
+        if verbose:
+            params += " verbose"
         if skip_sw:
             params += " skip_sw"
         if skip_hw:
@@ -520,6 +524,13 @@ def check_extack(output, reference, args):
 def check_extack_nsim(output, reference, args):
     check_extack(output, "Error: netdevsim: " + reference, args)
 
+def check_verifier_log(output, reference):
+    lines = output.split("\n")
+    for l in reversed(lines):
+        if l == reference:
+            return
+    fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
 # Parse command line
 parser = argparse.ArgumentParser()
 parser.add_argument("--log", help="output verbose log to given file")
@@ -634,8 +645,10 @@ try:
     sim.wait_for_flush()
 
     start_test("Test TC offloads work...")
-    ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+    ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
     fail(ret != 0, "TC filter did not load with TC offloads enabled")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test TC offload basics...")
     dfs = sim.dfs_get_bound_progs(expected=1)
@@ -744,12 +757,13 @@ try:
 
     sim.wait_for_flush()
     start_test("Test XDP offload...")
-    sim.set_xdp(obj, "offload")
+    _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
     ipl = sim.ip_link_show(xdp=True)
     link_xdp = ipl["xdp"]["prog"]
     progs = bpftool_prog_list(expected=1)
     prog = progs[0]
     fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test XDP offload is device bound...")
     dfs = sim.dfs_get_bound_progs(expected=1)
-- 
cgit v1.2.3


From 6d2d58f1b7ec379eb9467a5bc010ba49295d7714 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 23 Jan 2018 11:22:56 -0800
Subject: selftests/bpf: validate replace of TC filters is working

Daniel discovered recently I broke TC filter replace (and fixed
it in commit ad9294dbc227 ("bpf: fix cls_bpf on filter replace")).
Add a test to make sure it never happens again.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_offload.py | 55 ++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index d128a16ee9a8..ae3eea3ab820 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -429,8 +429,26 @@ class NetdevSim:
                  (len(filters), expected))
         return filters
 
-    def cls_bpf_add_filter(self, bpf, da=False, verbose=False, skip_sw=False,
-                           skip_hw=False, fail=True, include_stderr=False):
+    def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+                      cls="", params="",
+                      fail=True, include_stderr=False):
+        spec = ""
+        if prio is not None:
+            spec += " prio %d" % (prio)
+        if handle:
+            spec += " handle %s" % (handle)
+
+        return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+                  .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+                          cls=cls, params=params),
+                  fail=fail, include_stderr=include_stderr)
+
+    def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+                           da=False, verbose=False,
+                           skip_sw=False, skip_hw=False,
+                           fail=True, include_stderr=False):
+        cls = "bpf " + bpf
+
         params = ""
         if da:
             params += " da"
@@ -440,9 +458,10 @@ class NetdevSim:
             params += " skip_sw"
         if skip_hw:
             params += " skip_hw"
-        return tc("filter add dev %s ingress bpf %s %s" %
-                  (self['ifname'], bpf, params),
-                  fail=fail, include_stderr=include_stderr)
+
+        return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+                                  params=params,
+                                  fail=fail, include_stderr=include_stderr)
 
     def set_ethtool_tc_offloads(self, enable, fail=True):
         args = "hw-tc-offload %s" % ("on" if enable else "off")
@@ -644,6 +663,32 @@ try:
                       args)
     sim.wait_for_flush()
 
+    start_test("Test TC replace...")
+    sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+    sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+    sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+    sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+    sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+    sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+    sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    start_test("Test TC replace bad flags...")
+    for i in range(3):
+        for j in range(3):
+            ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+                                            skip_sw=(j == 1), skip_hw=(j == 2),
+                                            fail=False)
+            fail(bool(ret) != bool(j),
+                 "Software TC incorrect load in replace test, iteration %d" %
+                 (j))
+        sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    sim.tc_flush_filters()
+
     start_test("Test TC offloads work...")
     ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
                                          fail=False, include_stderr=True)
-- 
cgit v1.2.3


From 31e95b61e172144bb2b626a291db1bdc0769275b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 23 Jan 2018 20:05:51 -0800
Subject: selftests/bpf: make 'dubious pointer arithmetic' test useful

mostly revert the previous workaround and make
'dubious pointer arithmetic' test useful again.
Use (ptr - ptr) << const instead of ptr << const to generate large scalar.
The rest stays as before commit 2b36047e7889.

Fixes: 2b36047e7889 ("selftests/bpf: fix test_align")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_align.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index e19b410125eb..ff8bd7e3e50c 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -446,11 +446,9 @@ static struct bpf_align_test tests[] = {
 		.insns = {
 			PREP_PKT_POINTERS,
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-			/* ptr & const => unknown & const */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
-			/* ptr << const => unknown << const */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			/* (ptr - ptr) << 2 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
 			BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
 			/* We have a (4n) value.  Let's make a packet offset
 			 * out of it.  First add 14, to make it a (4n+2)
@@ -473,8 +471,26 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
-			/* R5 bitwise operator &= on pointer prohibited */
+			{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+			/* (ptr - ptr) << 2 == unknown, (4n) */
+			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			/* (4n) + 14 == (4n+2).  We blow our bounds, because
+			 * the add could overflow.
+			 */
+			{7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>=0 */
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* packet pointer + nonnegative (4n+2) */
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+			 * We checked the bounds, but it might have been able
+			 * to overflow if the packet pointer started in the
+			 * upper half of the address space.
+			 * So we did not get a 'range' on R6, and the access
+			 * attempt will fail.
+			 */
+			{15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 		}
 	},
 	{
-- 
cgit v1.2.3


From d6d4f60c3a0933852dcc40a2142d93027ea1da76 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Thu, 25 Jan 2018 16:14:16 -0800
Subject: bpf: add selftest for tcpbpf

Added a selftest for tcpbpf (sock_ops) that checks that the appropriate
callbacks occured and that it can access tcp_sock fields and that their
values are correct.

Run with command: ./test_tcpbpf_user
Adding the flag "-d" will show why it did not pass.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h                 |  86 ++++++++++++++++-
 tools/testing/selftests/bpf/Makefile           |   4 +-
 tools/testing/selftests/bpf/bpf_helpers.h      |   2 +
 tools/testing/selftests/bpf/tcp_client.py      |  51 ++++++++++
 tools/testing/selftests/bpf/tcp_server.py      |  83 ++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf.h      |  16 ++++
 tools/testing/selftests/bpf/test_tcpbpf_kern.c | 118 +++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf_user.c | 126 +++++++++++++++++++++++++
 8 files changed, 480 insertions(+), 6 deletions(-)
 create mode 100755 tools/testing/selftests/bpf/tcp_client.py
 create mode 100755 tools/testing/selftests/bpf/tcp_server.py
 create mode 100644 tools/testing/selftests/bpf/test_tcpbpf.h
 create mode 100644 tools/testing/selftests/bpf/test_tcpbpf_kern.c
 create mode 100644 tools/testing/selftests/bpf/test_tcpbpf_user.c

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index af1f49ad8b88..db6bdc375126 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -17,7 +17,7 @@
 #define BPF_ALU64	0x07	/* alu mode in double word width */
 
 /* ld/ldx fields */
-#define BPF_DW		0x18	/* double word */
+#define BPF_DW		0x18	/* double word (64-bit) */
 #define BPF_XADD	0xc0	/* exclusive add */
 
 /* alu/jmp fields */
@@ -642,6 +642,14 @@ union bpf_attr {
  *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ *     Set callback flags for sock_ops
+ *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ *     @flags: flags value
+ *     Return: 0 for no error
+ *             -EINVAL if there is no full tcp socket
+ *             bits in flags that are not supported by current kernel
+ *
  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
  *     Grow or shrink room in sk_buff.
  *     @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
-	FN(override_return),
+	FN(override_return),		\
+	FN(sock_ops_cb_flags_set),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
 struct bpf_sock_ops {
 	__u32 op;
 	union {
-		__u32 reply;
-		__u32 replylong[4];
+		__u32 args[4];		/* Optionally passed to bpf program */
+		__u32 reply;		/* Returned by bpf program	    */
+		__u32 replylong[4];	/* Optionally returned by bpf prog  */
 	};
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
+	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+	__u32 state;
+	__u32 rtt_min;
+	__u32 snd_ssthresh;
+	__u32 rcv_nxt;
+	__u32 snd_nxt;
+	__u32 snd_una;
+	__u32 mss_cache;
+	__u32 ecn_flags;
+	__u32 rate_delivered;
+	__u32 rate_interval_us;
+	__u32 packets_out;
+	__u32 retrans_out;
+	__u32 total_retrans;
+	__u32 segs_in;
+	__u32 data_segs_in;
+	__u32 segs_out;
+	__u32 data_segs_out;
+	__u32 lost_out;
+	__u32 sacked_out;
+	__u32 sk_txhash;
+	__u64 bytes_received;
+	__u64 bytes_acked;
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
+							 * supported cb flags
+							 */
+
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
  */
@@ -1003,6 +1044,43 @@ enum {
 					 * a congestion threshold. RTTs above
 					 * this indicate congestion
 					 */
+	BPF_SOCK_OPS_RTO_CB,		/* Called when an RTO has triggered.
+					 * Arg1: value of icsk_retransmits
+					 * Arg2: value of icsk_rto
+					 * Arg3: whether RTO has expired
+					 */
+	BPF_SOCK_OPS_RETRANS_CB,	/* Called when skb is retransmitted.
+					 * Arg1: sequence number of 1st byte
+					 * Arg2: # segments
+					 * Arg3: return value of
+					 *       tcp_transmit_skb (0 => success)
+					 */
+	BPF_SOCK_OPS_STATE_CB,		/* Called when TCP changes state.
+					 * Arg1: old_state
+					 * Arg2: new_state
+					 */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+	BPF_TCP_ESTABLISHED = 1,
+	BPF_TCP_SYN_SENT,
+	BPF_TCP_SYN_RECV,
+	BPF_TCP_FIN_WAIT1,
+	BPF_TCP_FIN_WAIT2,
+	BPF_TCP_TIME_WAIT,
+	BPF_TCP_CLOSE,
+	BPF_TCP_CLOSE_WAIT,
+	BPF_TCP_LAST_ACK,
+	BPF_TCP_LISTEN,
+	BPF_TCP_CLOSING,	/* Now a valid state */
+	BPF_TCP_NEW_SYN_RECV,
+
+	BPF_TCP_MAX_STATES	/* Leave at the end! */
 };
 
 #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3a44b655d852..98688352208b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -14,13 +14,13 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i
 LDLIBS += -lcap -lelf -lrt
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
-	test_align test_verifier_log test_dev_cgroup
+	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o
+	sample_map_ret0.o test_tcpbpf_kern.o
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 33cb00e46c49..dde2c11d7771 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
 static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
 			     int optlen) =
 	(void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+	(void *) BPF_FUNC_sock_ops_cb_flags_set;
 static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
 	(void *) BPF_FUNC_sk_redirect_map;
 static int (*bpf_sock_map_update)(void *map, void *key, void *value,
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 000000000000..481dccdf140c
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python2
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+    buf = ''
+    while len(buf) < n:
+        rem = n - len(buf)
+        try: s = sock.recv(rem)
+        except (socket.error), e: return ''
+        buf += s
+    return buf
+
+def send(sock, s):
+    total = len(s)
+    count = 0
+    while count < total:
+        try: n = sock.send(s)
+        except (socket.error), e: n = 0
+        if n == 0:
+            return count;
+        count += n
+    return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+    sock.connect((HostName, serverPort))
+except socket.error as e:
+    sys.exit(1)
+
+buf = ''
+n = 0
+while n < 1000:
+    buf += '+'
+    n += 1
+
+sock.settimeout(1);
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 000000000000..bc454d7d0be2
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python2
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+    buf = ''
+    while len(buf) < n:
+        rem = n - len(buf)
+        try: s = sock.recv(rem)
+        except (socket.error), e: return ''
+        buf += s
+    return buf
+
+def send(sock, s):
+    total = len(s)
+    count = 0
+    while count < total:
+        try: n = sock.send(s)
+        except (socket.error), e: n = 0
+        if n == 0:
+            return count;
+        count += n
+    return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+try: serverSocket.bind((host, 0))
+except socket.error as msg:
+    print 'bind fails: ', msg
+
+sn = serverSocket.getsockname()
+serverPort = sn[1]
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = ''
+n = 0
+while n < 500:
+    buf += '.'
+    n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+    readyRead, readyWrite, inError = \
+        select.select(readList, [], [], 2)
+
+    if len(readyRead) > 0:
+        waitCount = 0
+        for sock in readyRead:
+            if sock == serverSocket:
+                (clientSocket, address) = serverSocket.accept()
+                address = str(address[0])
+                readList.append(clientSocket)
+            else:
+                sock.settimeout(1);
+                s = read(sock, 1000)
+                n = send(sock, buf)
+                sock.close()
+                serverSocket.close()
+                sys.exit(0)
+    else:
+        print 'Select timeout!'
+        sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
new file mode 100644
index 000000000000..2fe43289943c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpbpf_globals {
+	__u32 event_map;
+	__u32 total_retrans;
+	__u32 data_segs_in;
+	__u32 data_segs_out;
+	__u32 bad_cb_test_rv;
+	__u32 good_cb_test_rv;
+	__u64 bytes_received;
+	__u64 bytes_acked;
+};
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 000000000000..66bf71541903
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct tcpbpf_globals),
+	.max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+	__u32 key = 0;
+	struct tcpbpf_globals g, *gp;
+
+	gp = bpf_map_lookup_elem(&global_map, &key);
+	if (gp == NULL) {
+		struct tcpbpf_globals g = {0};
+
+		g.event_map |= (1 << event);
+		bpf_map_update_elem(&global_map, &key, &g,
+			    BPF_ANY);
+	} else {
+		g = *gp;
+		g.event_map |= (1 << event);
+		bpf_map_update_elem(&global_map, &key, &g,
+			    BPF_ANY);
+	}
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int bad_call_rv = 0;
+	int good_call_rv = 0;
+	int op;
+	int v = 0;
+
+	op = (int) skops->op;
+
+	update_event_map(op);
+
+	switch (op) {
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		/* Test failure to set largest cb flag (assumes not defined) */
+		bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+		/* Set callback */
+		good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+						 BPF_SOCK_OPS_STATE_CB_FLAG);
+		/* Update results */
+		{
+			__u32 key = 0;
+			struct tcpbpf_globals g, *gp;
+
+			gp = bpf_map_lookup_elem(&global_map, &key);
+			if (!gp)
+				break;
+			g = *gp;
+			g.bad_cb_test_rv = bad_call_rv;
+			g.good_cb_test_rv = good_call_rv;
+			bpf_map_update_elem(&global_map, &key, &g,
+					    BPF_ANY);
+		}
+		break;
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		/* Set callback */
+//		good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+//						 BPF_SOCK_OPS_STATE_CB_FLAG);
+		skops->sk_txhash = 0x12345f;
+		v = 0xff;
+		rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+				    sizeof(v));
+		break;
+	case BPF_SOCK_OPS_RTO_CB:
+		break;
+	case BPF_SOCK_OPS_RETRANS_CB:
+		break;
+	case BPF_SOCK_OPS_STATE_CB:
+		if (skops->args[1] == BPF_TCP_CLOSE) {
+			__u32 key = 0;
+			struct tcpbpf_globals g, *gp;
+
+			gp = bpf_map_lookup_elem(&global_map, &key);
+			if (!gp)
+				break;
+			g = *gp;
+			g.total_retrans = skops->total_retrans;
+			g.data_segs_in = skops->data_segs_in;
+			g.data_segs_out = skops->data_segs_out;
+			g.bytes_received = skops->bytes_received;
+			g.bytes_acked = skops->bytes_acked;
+			bpf_map_update_elem(&global_map, &key, &g,
+					    BPF_ANY);
+		}
+		break;
+	default:
+		rv = -1;
+	}
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 000000000000..95a370f3d378
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include <linux/perf_event.h>
+#include "test_tcpbpf.h"
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
+#define SYSTEM(CMD)						\
+	do {							\
+		if (system(CMD)) {				\
+			printf("system(%s) FAILS!\n", CMD);	\
+		}						\
+	} while (0)
+
+int main(int argc, char **argv)
+{
+	const char *file = "test_tcpbpf_kern.o";
+	struct tcpbpf_globals g = {0};
+	int cg_fd, prog_fd, map_fd;
+	bool debug_flag = false;
+	int error = EXIT_FAILURE;
+	struct bpf_object *obj;
+	char cmd[100], *dir;
+	struct stat buffer;
+	__u32 key = 0;
+	int pid;
+	int rv;
+
+	if (argc > 1 && strcmp(argv[1], "-d") == 0)
+		debug_flag = true;
+
+	dir = "/tmp/cgroupv2/foo";
+
+	if (stat(dir, &buffer) != 0) {
+		SYSTEM("mkdir -p /tmp/cgroupv2");
+		SYSTEM("mount -t cgroup2 none /tmp/cgroupv2");
+		SYSTEM("mkdir -p /tmp/cgroupv2/foo");
+	}
+	pid = (int) getpid();
+	sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid);
+	SYSTEM(cmd);
+
+	cg_fd = open(dir, O_DIRECTORY, O_RDONLY);
+	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+		printf("FAILED: load_bpf_file failed for: %s\n", file);
+		goto err;
+	}
+
+	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (rv) {
+		printf("FAILED: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		goto err;
+	}
+
+	SYSTEM("./tcp_server.py");
+
+	map_fd = bpf_find_map(__func__, obj, "global_map");
+	if (map_fd < 0)
+		goto err;
+
+	rv = bpf_map_lookup_elem(map_fd, &key, &g);
+	if (rv != 0) {
+		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+		goto err;
+	}
+
+	if (g.bytes_received != 501 || g.bytes_acked != 1002 ||
+	    g.data_segs_in != 1 || g.data_segs_out != 1 ||
+	    (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 ||
+		g.good_cb_test_rv != 0) {
+		printf("FAILED: Wrong stats\n");
+		if (debug_flag) {
+			printf("\n");
+			printf("bytes_received: %d (expecting 501)\n",
+			       (int)g.bytes_received);
+			printf("bytes_acked:    %d (expecting 1002)\n",
+			       (int)g.bytes_acked);
+			printf("data_segs_in:   %d (expecting 1)\n",
+			       g.data_segs_in);
+			printf("data_segs_out:  %d (expecting 1)\n",
+			       g.data_segs_out);
+			printf("event_map:      0x%x (at least 0x47e)\n",
+			       g.event_map);
+			printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n",
+			       g.bad_cb_test_rv);
+			printf("good_cb_test_rv:0x%x (expecting 0)\n",
+			       g.good_cb_test_rv);
+		}
+		goto err;
+	}
+	printf("PASSED!\n");
+	error = 0;
+err:
+	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+	return error;
+
+}
-- 
cgit v1.2.3


From 2fb89a38d35bb6c935edb819d9096de455ce87cf Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 25 Jan 2018 14:00:52 -0800
Subject: selftests/bpf: check for spurious extacks from the driver

Drivers should not report errors when offload is not forced.
Check stdout and stderr for familiar messages when with no
skip flags and with skip_hw.  Check for add, replace, and
destroy.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_offload.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index ae3eea3ab820..49f5ceeabfa6 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -543,6 +543,10 @@ def check_extack(output, reference, args):
 def check_extack_nsim(output, reference, args):
     check_extack(output, "Error: netdevsim: " + reference, args)
 
+def check_no_extack(res, needle):
+    fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+         "Found '%s' in command output, leaky extack?" % (needle))
+
 def check_verifier_log(output, reference):
     lines = output.split("\n")
     for l in reversed(lines):
@@ -550,6 +554,18 @@ def check_verifier_log(output, reference):
             return
     fail(True, "Missing or incorrect message from netdevsim in verifier log")
 
+def test_spurios_extack(sim, obj, skip_hw, needle):
+    res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+                                 include_stderr=True)
+    check_no_extack(res, needle)
+    res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+                                 skip_hw=skip_hw, include_stderr=True)
+    check_no_extack(res, needle)
+    res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+                            include_stderr=True)
+    check_no_extack(res, needle)
+
+
 # Parse command line
 parser = argparse.ArgumentParser()
 parser.add_argument("--log", help="output verbose log to given file")
@@ -687,6 +703,17 @@ try:
                  (j))
         sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
 
+    start_test("Test spurious extack from the driver...")
+    test_spurios_extack(sim, obj, False, "netdevsim")
+    test_spurios_extack(sim, obj, True, "netdevsim")
+
+    sim.set_ethtool_tc_offloads(False)
+
+    test_spurios_extack(sim, obj, False, "TC offload is disabled")
+    test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+    sim.set_ethtool_tc_offloads(True)
+
     sim.tc_flush_filters()
 
     start_test("Test TC offloads work...")
-- 
cgit v1.2.3


From baf6a07e040d8308165654c7f49ee9ee18cd89be Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 25 Jan 2018 14:00:53 -0800
Subject: selftests/bpf: check for chain-non-0 extack message

Make sure netdevsim doesn't allow offload of chains other than 0,
and that it reports the expected extack message.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_offload.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 49f5ceeabfa6..e78aad0a68bb 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -430,13 +430,15 @@ class NetdevSim:
         return filters
 
     def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
-                      cls="", params="",
+                      chain=None, cls="", params="",
                       fail=True, include_stderr=False):
         spec = ""
         if prio is not None:
             spec += " prio %d" % (prio)
         if handle:
             spec += " handle %s" % (handle)
+        if chain is not None:
+            spec += " chain %d" % (chain)
 
         return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
                   .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
@@ -444,7 +446,7 @@ class NetdevSim:
                   fail=fail, include_stderr=include_stderr)
 
     def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
-                           da=False, verbose=False,
+                           chain=None, da=False, verbose=False,
                            skip_sw=False, skip_hw=False,
                            fail=True, include_stderr=False):
         cls = "bpf " + bpf
@@ -460,7 +462,7 @@ class NetdevSim:
             params += " skip_hw"
 
         return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
-                                  params=params,
+                                  chain=chain, params=params,
                                   fail=fail, include_stderr=include_stderr)
 
     def set_ethtool_tc_offloads(self, enable, fail=True):
@@ -679,6 +681,14 @@ try:
                       args)
     sim.wait_for_flush()
 
+    start_test("Test non-0 chain offload...")
+    ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+                                         skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "Offloaded a filter to chain other than 0")
+    check_extack(err, "Error: Driver supports only offload of chain 0.", args)
+    sim.tc_flush_filters()
+
     start_test("Test TC replace...")
     sim.cls_bpf_add_filter(obj, prio=1, handle=1)
     sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
-- 
cgit v1.2.3


From 771fc607e6b97be8f0cc7dfaa61173009c2214d4 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Fri, 26 Jan 2018 12:06:07 -0800
Subject: bpf: clean up from test_tcpbpf_kern.c

Removed commented lines from test_tcpbpf_kern.c

Fixes: d6d4f60c3a09 bpf: add selftest for tcpbpf
Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_tcpbpf_kern.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
index 66bf71541903..57119ad57a3f 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -79,9 +79,6 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		}
 		break;
 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
-		/* Set callback */
-//		good_call_rv = bpf_sock_ops_cb_flags_set(skops,
-//						 BPF_SOCK_OPS_STATE_CB_FLAG);
 		skops->sk_txhash = 0x12345f;
 		v = 0xff;
 		rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
-- 
cgit v1.2.3


From 21ccaf21497b72f42133182716a42dbf573d314b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 26 Jan 2018 23:33:48 +0100
Subject: bpf: add further test cases around div/mod and others

Update selftests to relfect recent changes and add various new
test cases.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 343 ++++++++++++++++++++++++++--
 1 file changed, 330 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 9e7075b268be..697bd83de295 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -21,6 +21,7 @@
 #include <stddef.h>
 #include <stdbool.h>
 #include <sched.h>
+#include <limits.h>
 
 #include <sys/capability.h>
 #include <sys/resource.h>
@@ -111,7 +112,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.retval = 0,
+		.retval = 42,
 	},
 	{
 		"DIV32 by 0, zero check 2",
@@ -123,7 +124,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.retval = 0,
+		.retval = 42,
 	},
 	{
 		"DIV64 by 0, zero check",
@@ -135,7 +136,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.retval = 0,
+		.retval = 42,
 	},
 	{
 		"MOD32 by 0, zero check 1",
@@ -147,7 +148,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.retval = 0,
+		.retval = 42,
 	},
 	{
 		"MOD32 by 0, zero check 2",
@@ -159,7 +160,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.retval = 0,
+		.retval = 42,
 	},
 	{
 		"MOD64 by 0, zero check",
@@ -171,13 +172,245 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"DIV32 by 0, zero check ok, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 2),
+			BPF_MOV32_IMM(BPF_REG_2, 16),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 8,
+	},
+	{
+		"DIV32 by 0, zero check 1, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV32 by 0, zero check 2, cls",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 by 0, zero check, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 by 0, zero check ok, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 3),
+			BPF_MOV32_IMM(BPF_REG_2, 5),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"MOD32 by 0, zero check 1, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"MOD32 by 0, zero check 2, cls",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"MOD64 by 0, zero check 1, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 2),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"MOD64 by 0, zero check 2, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, -1),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = -1,
+	},
+	/* Just make sure that JITs used udiv/umod as otherwise we get
+	 * an exception from INT_MIN/-1 overflow similarly as with div
+	 * by zero.
+	 */
+	{
+		"DIV32 overflow, check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, -1),
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV32 overflow, check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 overflow, check 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, -1),
+			BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 overflow, check 2",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+			BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
 		.retval = 0,
 	},
+	{
+		"MOD32 overflow, check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, -1),
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = INT_MIN,
+	},
+	{
+		"MOD32 overflow, check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = INT_MIN,
+	},
+	{
+		"MOD64 overflow, check 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, -1),
+			BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"MOD64 overflow, check 2",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"xor32 zero extend check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_2, -1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff),
+			BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2),
+			BPF_MOV32_IMM(BPF_REG_0, 2),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
 	{
 		"empty prog",
 		.insns = {
 		},
-		.errstr = "last insn is not an exit or jmp",
+		.errstr = "unknown opcode 00",
 		.result = REJECT,
 	},
 	{
@@ -374,7 +607,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "BPF_ARSH not supported for 32 bit ALU",
+		.errstr = "unknown opcode c4",
 	},
 	{
 		"arsh32 on reg",
@@ -385,7 +618,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "BPF_ARSH not supported for 32 bit ALU",
+		.errstr = "unknown opcode cc",
 	},
 	{
 		"arsh64 on imm",
@@ -501,7 +734,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "BPF_CALL uses reserved",
+		.errstr = "unknown opcode 8d",
 		.result = REJECT,
 	},
 	{
@@ -691,7 +924,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(0, 0, 0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid BPF_LD_IMM",
+		.errstr = "unknown opcode 00",
 		.result = REJECT,
 	},
 	{
@@ -709,7 +942,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(-1, 0, 0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid BPF_ALU opcode f0",
+		.errstr = "unknown opcode ff",
 		.result = REJECT,
 	},
 	{
@@ -718,7 +951,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(-1, -1, -1, -1, -1),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "invalid BPF_ALU opcode f0",
+		.errstr = "unknown opcode ff",
 		.result = REJECT,
 	},
 	{
@@ -7543,7 +7776,7 @@ static struct bpf_test tests[] = {
 			},
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "BPF_END uses reserved fields",
+		.errstr = "unknown opcode d7",
 		.result = REJECT,
 	},
 	{
@@ -8963,6 +9196,90 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.retval = 1,
 	},
+	{
+		"calls: div by 0 in subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(BPF_REG_2, 0),
+			BPF_MOV32_IMM(BPF_REG_3, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"calls: multiple ret types in subprog 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+	},
+	{
+		"calls: multiple ret types in subprog 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+				    offsetof(struct __sk_buff, data)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 16 },
+		.result = REJECT,
+		.errstr = "R0 min value is outside of the array range",
+	},
 	{
 		"calls: overlapping caller/callee",
 		.insns = {
-- 
cgit v1.2.3


From af32efeede9e188fefe0af51d117c31cf281de65 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Jan 2018 15:06:08 -0800
Subject: tools/bpf: add a multithreaded stress test in bpf selftests
 test_lpm_map

The new test will spawn four threads, doing map update, delete, lookup
and get_next_key in parallel. It is able to reproduce the issue in the
previous commit found by syzbot and Eric Dumazet.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile       |  2 +-
 tools/testing/selftests/bpf/test_lpm_map.c | 95 ++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 98688352208b..bf05bc5e36e5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf -lrt
+LDLIBS += -lcap -lelf -lrt -lpthread
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 081510853c6d..2be87e9ee28d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/bpf.h>
+#include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void)
 	close(map_fd);
 }
 
+#define MAX_TEST_KEYS	4
+struct lpm_mt_test_info {
+	int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
+	int iter;
+	int map_fd;
+	struct {
+		__u32 prefixlen;
+		__u32 data;
+	} key[MAX_TEST_KEYS];
+};
+
+static void *lpm_test_command(void *arg)
+{
+	int i, j, ret, iter, key_size;
+	struct lpm_mt_test_info *info = arg;
+	struct bpf_lpm_trie_key *key_p;
+
+	key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+	key_p = alloca(key_size);
+	for (iter = 0; iter < info->iter; iter++)
+		for (i = 0; i < MAX_TEST_KEYS; i++) {
+			/* first half of iterations in forward order,
+			 * and second half in backward order.
+			 */
+			j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
+			key_p->prefixlen = info->key[j].prefixlen;
+			memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
+			if (info->cmd == 0) {
+				__u32 value = j;
+				/* update must succeed */
+				assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
+			} else if (info->cmd == 1) {
+				ret = bpf_map_delete_elem(info->map_fd, key_p);
+				assert(ret == 0 || errno == ENOENT);
+			} else if (info->cmd == 2) {
+				__u32 value;
+				ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+				assert(ret == 0 || errno == ENOENT);
+			} else {
+				struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+				ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+				assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+			}
+		}
+
+	// Pass successful exit info back to the main thread
+	pthread_exit((void *)info);
+}
+
+static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
+{
+	info->iter = 2000;
+	info->map_fd = map_fd;
+	info->key[0].prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
+	info->key[1].prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
+	info->key[2].prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
+	info->key[3].prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
+}
+
+static void test_lpm_multi_thread(void)
+{
+	struct lpm_mt_test_info info[4];
+	size_t key_size, value_size;
+	pthread_t thread_id[4];
+	int i, map_fd;
+	void *ret;
+
+	/* create a trie */
+	value_size = sizeof(__u32);
+	key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
+				100, BPF_F_NO_PREALLOC);
+
+	/* create 4 threads to test update, delete, lookup and get_next_key */
+	setup_lpm_mt_test_info(&info[0], map_fd);
+	for (i = 0; i < 4; i++) {
+		if (i != 0)
+			memcpy(&info[i], &info[0], sizeof(info[i]));
+		info[i].cmd = i;
+		assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
+	}
+
+	for (i = 0; i < 4; i++)
+		assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
+
+	close(map_fd);
+}
+
 int main(void)
 {
 	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
@@ -667,6 +760,8 @@ int main(void)
 
 	test_lpm_get_next_key();
 
+	test_lpm_multi_thread();
+
 	printf("test_lpm: OK\n");
 	return 0;
 }
-- 
cgit v1.2.3


From 03aadf9499ba71ebc6059317fc409012c95ddcdb Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:31:58 +0300
Subject: NTB: ntb_test: Safely use paths with whitespace

If some of variables like LOC/REM or LOCAL_*/REMOTE_* got
whitespaces, the script may fail with syntax error.

Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem")
Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 62 ++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 5fc7ad359e21..a8647ad891eb 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -87,7 +87,7 @@ set -e
 
 function _modprobe()
 {
-        modprobe "$@"
+	modprobe "$@"
 
 	if [[ "$REMOTE_HOST" != "" ]]; then
 		ssh "$REMOTE_HOST" modprobe "$@"
@@ -274,13 +274,13 @@ function pingpong_test()
 
 	echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)"
 
-	LOC_START=$(read_file $LOC/count)
-	REM_START=$(read_file $REM/count)
+	LOC_START=$(read_file "$LOC/count")
+	REM_START=$(read_file "$REM/count")
 
 	sleep 7
 
-	LOC_END=$(read_file $LOC/count)
-	REM_END=$(read_file $REM/count)
+	LOC_END=$(read_file "$LOC/count")
+	REM_END=$(read_file "$REM/count")
 
 	if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then
 		echo "Ping pong counter not incrementing!" >&2
@@ -304,15 +304,15 @@ function perf_test()
 		max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA
 
 	echo "Running local perf test $WITH DMA"
-	write_file "" $LOCAL_PERF/run
+	write_file "" "$LOCAL_PERF/run"
 	echo -n "  "
-	read_file $LOCAL_PERF/run
+	read_file "$LOCAL_PERF/run"
 	echo "  Passed"
 
 	echo "Running remote perf test $WITH DMA"
-	write_file "" $REMOTE_PERF/run
+	write_file "" "$REMOTE_PERF/run"
 	echo -n "  "
-	read_file $REMOTE_PERF/run
+	read_file "$REMOTE_PERF/run"
 	echo "  Passed"
 
 	_modprobe -r ntb_perf
@@ -320,39 +320,39 @@ function perf_test()
 
 function ntb_tool_tests()
 {
-	LOCAL_TOOL=$DEBUGFS/ntb_tool/$LOCAL_DEV
-	REMOTE_TOOL=$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV
+	LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV"
+	REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV"
 
 	echo "Starting ntb_tool tests..."
 
 	_modprobe ntb_tool
 
-	write_file Y $LOCAL_TOOL/link_event
-	write_file Y $REMOTE_TOOL/link_event
+	write_file "Y" "$LOCAL_TOOL/link_event"
+	write_file "Y" "$REMOTE_TOOL/link_event"
 
-	link_test $LOCAL_TOOL $REMOTE_TOOL
-	link_test $REMOTE_TOOL $LOCAL_TOOL
+	link_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+	link_test "$REMOTE_TOOL" "$LOCAL_TOOL"
 
 	#Ensure the link is up on both sides before continuing
-	write_file Y $LOCAL_TOOL/link_event
-	write_file Y $REMOTE_TOOL/link_event
+	write_file "Y" "$LOCAL_TOOL/link_event"
+	write_file "Y" "$REMOTE_TOOL/link_event"
 
-	for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do
+	for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do
 		PT=$(basename $PEER_TRANS)
-		write_file $MW_SIZE $LOCAL_TOOL/$PT
-		write_file $MW_SIZE $REMOTE_TOOL/$PT
+		write_file $MW_SIZE "$LOCAL_TOOL/$PT"
+		write_file $MW_SIZE "$REMOTE_TOOL/$PT"
 	done
 
-	doorbell_test $LOCAL_TOOL $REMOTE_TOOL
-	doorbell_test $REMOTE_TOOL $LOCAL_TOOL
-	scratchpad_test $LOCAL_TOOL $REMOTE_TOOL
-	scratchpad_test $REMOTE_TOOL $LOCAL_TOOL
+	doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+	doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
+	scratchpad_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+	scratchpad_test "$REMOTE_TOOL" "$LOCAL_TOOL"
 
-	for MW in $(ls $LOCAL_TOOL/mw*); do
+	for MW in $(ls "$LOCAL_TOOL"/mw*); do
 		MW=$(basename $MW)
 
-		mw_test $MW $LOCAL_TOOL $REMOTE_TOOL
-		mw_test $MW $REMOTE_TOOL $LOCAL_TOOL
+		mw_test $MW "$LOCAL_TOOL" "$REMOTE_TOOL"
+		mw_test $MW "$REMOTE_TOOL" "$LOCAL_TOOL"
 	done
 
 	_modprobe -r ntb_tool
@@ -360,8 +360,8 @@ function ntb_tool_tests()
 
 function ntb_pingpong_tests()
 {
-	LOCAL_PP=$DEBUGFS/ntb_pingpong/$LOCAL_DEV
-	REMOTE_PP=$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV
+	LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV"
+	REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV"
 
 	echo "Starting ntb_pingpong tests..."
 
@@ -374,8 +374,8 @@ function ntb_pingpong_tests()
 
 function ntb_perf_tests()
 {
-	LOCAL_PERF=$DEBUGFS/ntb_perf/$LOCAL_DEV
-	REMOTE_PERF=$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV
+	LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
+	REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV"
 
 	echo "Starting ntb_perf tests..."
 
-- 
cgit v1.2.3


From dd151d7200ae6c5cfddbec7eb1a55ba8319e12b2 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:31:59 +0300
Subject: NTB: ntb_test: Add ntb_tool port tests

Multi-port interface is now available in ntb_tool driver. According
to the new NTB API, there might be more than two devices connected over
NTB. It means each device can have multiple freely enumerated ports.
Each port got index assigned by NTB hardware driver. This test is
performed to determine the local and peer ports as well as their indexes.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 52 +++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index a8647ad891eb..541ba70ad640 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -127,6 +127,56 @@ function write_file()
 	fi
 }
 
+function check_file()
+{
+	split_remote $1
+
+	if [[ "$REMOTE" != "" ]]; then
+		ssh "$REMOTE" "[[ -e ${VPATH} ]]"
+	else
+		[[ -e ${VPATH} ]]
+	fi
+}
+
+function find_pidx()
+{
+	PORT=$1
+	PPATH=$2
+
+	for ((i = 0; i < 64; i++)); do
+		PEER_DIR="$PPATH/peer$i"
+
+		check_file ${PEER_DIR} || break
+
+		PEER_PORT=$(read_file "${PEER_DIR}/port")
+		if [[ ${PORT} -eq $PEER_PORT ]]; then
+			echo $i
+			return 0
+		fi
+	done
+
+	return 1
+}
+
+function port_test()
+{
+	LOC=$1
+	REM=$2
+
+	echo "Running port tests on: $(basename $LOC) / $(basename $REM)"
+
+	LOCAL_PORT=$(read_file "$LOC/port")
+	REMOTE_PORT=$(read_file "$REM/port")
+
+	LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC")
+	REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM")
+
+	echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host"
+	echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host"
+
+	echo "  Passed"
+}
+
 function link_test()
 {
 	LOC=$1
@@ -327,6 +377,8 @@ function ntb_tool_tests()
 
 	_modprobe ntb_tool
 
+	port_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+
 	write_file "Y" "$LOCAL_TOOL/link_event"
 	write_file "Y" "$REMOTE_TOOL/link_event"
 
-- 
cgit v1.2.3


From 0298e5b9d0107a2c9d6e5ddf52ab90b405c3802d Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:32:00 +0300
Subject: NTB: ntb_test: Update ntb_tool link tests

Link Up and Down methods are used to change NTB link settings on
local side only for multi-port devices. Link is considered up
only if both sides local and peer set it up. Intel/AMD hardware
acts a bit different by assigning the Primary and Secondary roles,
so Primary device only is able to change the link state. Such behaviour
should be reflected in the test code.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 541ba70ad640..a3942f31c057 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -138,6 +138,11 @@ function check_file()
 	fi
 }
 
+function subdirname()
+{
+	echo $(basename $(dirname $1)) 2> /dev/null
+}
+
 function find_pidx()
 {
 	PORT=$1
@@ -183,9 +188,9 @@ function link_test()
 	REM=$2
 	EXP=0
 
-	echo "Running link tests on: $(basename $LOC) / $(basename $REM)"
+	echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)"
 
-	if ! write_file "N" "$LOC/link" 2> /dev/null; then
+	if ! write_file "N" "$LOC/../link" 2> /dev/null; then
 		echo "  Unsupported"
 		return
 	fi
@@ -193,12 +198,11 @@ function link_test()
 	write_file "N" "$LOC/link_event"
 
 	if [[ $(read_file "$REM/link") != "N" ]]; then
-		echo "Expected remote link to be down in $REM/link" >&2
+		echo "Expected link to be down in $REM/link" >&2
 		exit -1
 	fi
 
-	write_file "Y" "$LOC/link"
-	write_file "Y" "$LOC/link_event"
+	write_file "Y" "$LOC/../link"
 
 	echo "  Passed"
 }
@@ -379,15 +383,15 @@ function ntb_tool_tests()
 
 	port_test "$LOCAL_TOOL" "$REMOTE_TOOL"
 
-	write_file "Y" "$LOCAL_TOOL/link_event"
-	write_file "Y" "$REMOTE_TOOL/link_event"
+	LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX"
+	REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX"
 
-	link_test "$LOCAL_TOOL" "$REMOTE_TOOL"
-	link_test "$REMOTE_TOOL" "$LOCAL_TOOL"
+	link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
 
 	#Ensure the link is up on both sides before continuing
-	write_file "Y" "$LOCAL_TOOL/link_event"
-	write_file "Y" "$REMOTE_TOOL/link_event"
+	write_file "Y" "$LOCAL_PEER_TOOL/link_event"
+	write_file "Y" "$REMOTE_PEER_TOOL/link_event"
 
 	for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do
 		PT=$(basename $PEER_TRANS)
-- 
cgit v1.2.3


From 4e1ef427f273504fd99556f6f60a1218563ab772 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:32:01 +0300
Subject: NTB: ntb_test: Update ntb_tool DB tests

DB interface of ntb_tool driver hasn't been changed much, but
db_valid_mask DebugFS file has still been added. In this case
it's much better to test all valid DB bits instead of using
the predefined mask, which may be incorrect in general.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index a3942f31c057..ec2e51183f8d 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -18,7 +18,6 @@ LIST_DEVS=FALSE
 
 DEBUGFS=${DEBUGFS-/sys/kernel/debug}
 
-DB_BITMASK=0x7FFF
 PERF_RUN_ORDER=32
 MAX_MW_SIZE=0
 RUN_DMA_TESTS=
@@ -39,7 +38,6 @@ function show_help()
 	echo "be highly recommended."
 	echo
 	echo "Options:"
-	echo "  -b BITMASK      doorbell clear bitmask for ntb_tool"
 	echo "  -C              don't cleanup ntb modules on exit"
 	echo "  -d              run dma tests"
 	echo "  -h              show this help message"
@@ -56,7 +54,6 @@ function parse_args()
 	OPTIND=0
 	while getopts "b:Cdhlm:r:p:w:" opt; do
 		case "$opt" in
-		b)  DB_BITMASK=${OPTARG} ;;
 		C)  DONT_CLEANUP=1 ;;
 		d)  RUN_DMA_TESTS=1 ;;
 		h)  show_help; exit 0 ;;
@@ -215,21 +212,30 @@ function doorbell_test()
 
 	echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
 
-	write_file "c $DB_BITMASK" "$REM/db"
+	DB_VALID_MASK=$(read_file "$LOC/db_valid_mask")
 
-	for ((i=1; i <= 8; i++)); do
-		let DB=$(read_file "$REM/db") || true
-		if [[ "$DB" != "$EXP" ]]; then
+	write_file "c $DB_VALID_MASK" "$REM/db"
+
+	for ((i = 0; i < 64; i++)); do
+		DB=$(read_file "$REM/db")
+		if [[ "$DB" -ne "$EXP" ]]; then
 			echo "Doorbell doesn't match expected value $EXP " \
 			     "in $REM/db" >&2
 			exit -1
 		fi
 
-		let "MASK=1 << ($i-1)" || true
-		let "EXP=$EXP | $MASK" || true
+		let "MASK = (1 << $i) & $DB_VALID_MASK" || true
+		let "EXP = $EXP | $MASK" || true
+
 		write_file "s $MASK" "$LOC/peer_db"
 	done
 
+	write_file "c $DB_VALID_MASK" "$REM/db_mask"
+	write_file $DB_VALID_MASK "$REM/db_event"
+	write_file "s $DB_VALID_MASK" "$REM/db_mask"
+
+	write_file "c $DB_VALID_MASK" "$REM/db"
+
 	echo "  Passed"
 }
 
@@ -393,14 +399,15 @@ function ntb_tool_tests()
 	write_file "Y" "$LOCAL_PEER_TOOL/link_event"
 	write_file "Y" "$REMOTE_PEER_TOOL/link_event"
 
+	doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+	doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
+
 	for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do
 		PT=$(basename $PEER_TRANS)
 		write_file $MW_SIZE "$LOCAL_TOOL/$PT"
 		write_file $MW_SIZE "$REMOTE_TOOL/$PT"
 	done
 
-	doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
-	doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
 	scratchpad_test "$LOCAL_TOOL" "$REMOTE_TOOL"
 	scratchpad_test "$REMOTE_TOOL" "$LOCAL_TOOL"
 
-- 
cgit v1.2.3


From 06bd0407d06c4a8437401952caed4bc6bbba9e44 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:32:02 +0300
Subject: NTB: ntb_test: Update ntb_tool Scratchpad tests

Scratchpad NTB API has changed so has the ntb_tool driver. Outbound
Scratchpad DebugFS files have been moved to peer specific directories.
Each scratchpad is now available via separate file. The test code
has been accordingly altered.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 43 ++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index ec2e51183f8d..06b9b156a6fc 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -239,35 +239,44 @@ function doorbell_test()
 	echo "  Passed"
 }
 
-function read_spad()
+function get_files_count()
 {
-       VPATH=$1
-       IDX=$2
+	NAME=$1
+	LOC=$2
+
+	split_remote $LOC
 
-       ROW=($(read_file "$VPATH" | grep -e "^$IDX"))
-       let VAL=${ROW[1]} || true
-       echo $VAL
+	if [[ "$REMOTE" == "" ]]; then
+		echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l)
+	else
+		echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
+		       wc -l" 2> /dev/null)
+	fi
 }
 
 function scratchpad_test()
 {
 	LOC=$1
 	REM=$2
-	CNT=$(read_file "$LOC/spad" | wc -l)
 
-	echo "Running spad tests on: $(basename $LOC) / $(basename $REM)"
+	echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+	CNT=$(get_files_count "spad" "$LOC")
+
+	if [[ $CNT -eq 0 ]]; then
+		echo "  Unsupported"
+		return
+	fi
 
 	for ((i = 0; i < $CNT; i++)); do
 		VAL=$RANDOM
-		write_file "$i $VAL" "$LOC/peer_spad"
-		RVAL=$(read_spad "$REM/spad" $i)
+		write_file "$VAL" "$LOC/spad$i"
+		RVAL=$(read_file "$REM/../spad$i")
 
-		if [[ "$VAL" != "$RVAL" ]]; then
-			echo "Scratchpad doesn't match expected value $VAL " \
-			     "in $REM/spad, got $RVAL" >&2
+		if [[ "$VAL" -ne "$RVAL" ]]; then
+			echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2
 			exit -1
 		fi
-
 	done
 
 	echo "  Passed"
@@ -402,15 +411,15 @@ function ntb_tool_tests()
 	doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
 	doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
 
+	scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
 	for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do
 		PT=$(basename $PEER_TRANS)
 		write_file $MW_SIZE "$LOCAL_TOOL/$PT"
 		write_file $MW_SIZE "$REMOTE_TOOL/$PT"
 	done
 
-	scratchpad_test "$LOCAL_TOOL" "$REMOTE_TOOL"
-	scratchpad_test "$REMOTE_TOOL" "$LOCAL_TOOL"
-
 	for MW in $(ls "$LOCAL_TOOL"/mw*); do
 		MW=$(basename $MW)
 
-- 
cgit v1.2.3


From e770112b2dfa38a9481309f0333d84e122c23e2d Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:32:03 +0300
Subject: NTB: ntb_test: Add ntb_tool Message tests

Messages NTB API is now available. ntb_tool driver has been altered
to perform messages send and receive operation. The test of messages
read/write to/from peer device has been added to the script.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 37 +++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 06b9b156a6fc..849191fd2308 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -282,6 +282,40 @@ function scratchpad_test()
 	echo "  Passed"
 }
 
+function message_test()
+{
+	LOC=$1
+	REM=$2
+
+	echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+	CNT=$(get_files_count "msg" "$LOC")
+
+	if [[ $CNT -eq 0 ]]; then
+		echo "  Unsupported"
+		return
+	fi
+
+	MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits")
+	MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits")
+
+	write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts"
+	write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts"
+
+	for ((i = 0; i < $CNT; i++)); do
+		VAL=$RANDOM
+		write_file "$VAL" "$LOC/msg$i"
+		RVAL=$(read_file "$REM/../msg$i")
+
+		if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then
+			echo "Message $i value $RVAL doesn't match $VAL" >&2
+			exit -1
+		fi
+	done
+
+	echo "  Passed"
+}
+
 function write_mw()
 {
 	split_remote $2
@@ -414,6 +448,9 @@ function ntb_tool_tests()
 	scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
 	scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
 
+	message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
 	for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do
 		PT=$(basename $PEER_TRANS)
 		write_file $MW_SIZE "$LOCAL_TOOL/$PT"
-- 
cgit v1.2.3


From fa63be5428ef8756ec583aed0408ddd96109ff28 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:32:04 +0300
Subject: NTB: ntb_test: Update ntb_tool MW tests

There are devices (like IDT PCIe switches), which outbound MWs xlat address
is setup on peer side. In this case local side is supposed to allocate
a memory buffer and somehow deliver the xlat DMA address to peer so one
could set the outbound MW up. The MW test is altered so to support both
previous Intel/AMD and new IDT-like devices.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 89 +++++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 21 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 849191fd2308..2dea126c85eb 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -43,7 +43,9 @@ function show_help()
 	echo "  -h              show this help message"
 	echo "  -l              list available local and remote PCI ids"
 	echo "  -r REMOTE_HOST  specify the remote's hostname to connect"
-        echo "                  to for the test (using ssh)"
+	echo "                  to for the test (using ssh)"
+	echo "  -m MW_SIZE      memory window size for ntb_tool"
+	echo "                  (default: $MW_SIZE)"
 	echo "  -p NUM          ntb_perf run order (default: $PERF_RUN_ORDER)"
 	echo "  -w max_mw_size  maxmium memory window size"
 	echo
@@ -316,6 +318,32 @@ function message_test()
 	echo "  Passed"
 }
 
+function get_number()
+{
+	KEY=$1
+
+	sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p"
+}
+
+function mw_alloc()
+{
+	IDX=$1
+	LOC=$2
+	REM=$3
+
+	write_file $MW_SIZE "$LOC/mw_trans$IDX"
+
+	INB_MW=$(read_file "$LOC/mw_trans$IDX")
+	MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size")
+	MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address")
+
+	write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX"
+
+	if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then
+		echo "MW $IDX size aligned to $MW_ALIGNED_SIZE"
+	fi
+}
+
 function write_mw()
 {
 	split_remote $2
@@ -328,17 +356,15 @@ function write_mw()
 	fi
 }
 
-function mw_test()
+function mw_check()
 {
 	IDX=$1
 	LOC=$2
 	REM=$3
 
-	echo "Running $IDX tests on: $(basename $LOC) / $(basename $REM)"
-
-	write_mw "$LOC/$IDX"
+	write_mw "$LOC/mw$IDX"
 
-	split_remote "$LOC/$IDX"
+	split_remote "$LOC/mw$IDX"
 	if [[ "$REMOTE" == "" ]]; then
 		A=$VPATH
 	else
@@ -346,7 +372,7 @@ function mw_test()
 		ssh "$REMOTE" cat "$VPATH" > "$A"
 	fi
 
-	split_remote "$REM/peer_$IDX"
+	split_remote "$REM/peer_mw$IDX"
 	if [[ "$REMOTE" == "" ]]; then
 		B=$VPATH
 	else
@@ -354,7 +380,7 @@ function mw_test()
 		ssh "$REMOTE" cat "$VPATH" > "$B"
 	fi
 
-	cmp -n $MW_SIZE "$A" "$B"
+	cmp -n $MW_ALIGNED_SIZE "$A" "$B"
 	if [[ $? != 0 ]]; then
 		echo "Memory window $MW did not match!" >&2
 	fi
@@ -366,8 +392,39 @@ function mw_test()
 	if [[ "$B" == "/tmp/*" ]]; then
 		rm "$B"
 	fi
+}
+
+function mw_free()
+{
+	IDX=$1
+	LOC=$2
+	REM=$3
+
+	write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX"
+
+	write_file 0 "$LOC/mw_trans$IDX"
+}
+
+function mw_test()
+{
+	LOC=$1
+	REM=$2
+
+	CNT=$(get_files_count "mw_trans" "$LOC")
+
+	for ((i = 0; i < $CNT; i++)); do
+		echo "Running mw$i tests on: $(subdirname $LOC) / " \
+		     "$(subdirname $REM)"
+
+		mw_alloc $i $LOC $REM
+
+		mw_check $i $LOC $REM
+
+		mw_free $i $LOC  $REM
+
+		echo "  Passed"
+	done
 
-	echo "  Passed"
 }
 
 function pingpong_test()
@@ -451,18 +508,8 @@ function ntb_tool_tests()
 	message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
 	message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
 
-	for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do
-		PT=$(basename $PEER_TRANS)
-		write_file $MW_SIZE "$LOCAL_TOOL/$PT"
-		write_file $MW_SIZE "$REMOTE_TOOL/$PT"
-	done
-
-	for MW in $(ls "$LOCAL_TOOL"/mw*); do
-		MW=$(basename $MW)
-
-		mw_test $MW "$LOCAL_TOOL" "$REMOTE_TOOL"
-		mw_test $MW "$REMOTE_TOOL" "$LOCAL_TOOL"
-	done
+	mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
 
 	_modprobe -r ntb_tool
 }
-- 
cgit v1.2.3


From 4517a5701c1c6a30fddb619560589f970f633843 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 6 Dec 2017 17:32:05 +0300
Subject: NTB: ntb_test: Update ntb_perf tests

ntb_perf driver has been also updated so to have the multi-port
interface support. User now must specify what peer port is going
to be used to perform the test.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 2dea126c85eb..08cbfbbc7029 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -39,15 +39,16 @@ function show_help()
 	echo
 	echo "Options:"
 	echo "  -C              don't cleanup ntb modules on exit"
-	echo "  -d              run dma tests"
 	echo "  -h              show this help message"
 	echo "  -l              list available local and remote PCI ids"
 	echo "  -r REMOTE_HOST  specify the remote's hostname to connect"
 	echo "                  to for the test (using ssh)"
 	echo "  -m MW_SIZE      memory window size for ntb_tool"
 	echo "                  (default: $MW_SIZE)"
-	echo "  -p NUM          ntb_perf run order (default: $PERF_RUN_ORDER)"
-	echo "  -w max_mw_size  maxmium memory window size"
+	echo "  -d              run dma tests for ntb_perf"
+	echo "  -p ORDER        total data order for ntb_perf"
+	echo "                  (default: $PERF_RUN_ORDER)"
+	echo "  -w MAX_MW_SIZE  maxmium memory window size for ntb_perf"
 	echo
 }
 
@@ -460,17 +461,17 @@ function perf_test()
 		WITH="without"
 	fi
 
-	_modprobe ntb_perf run_order=$PERF_RUN_ORDER \
+	_modprobe ntb_perf total_order=$PERF_RUN_ORDER \
 		max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA
 
 	echo "Running local perf test $WITH DMA"
-	write_file "" "$LOCAL_PERF/run"
+	write_file "$LOCAL_PIDX" "$LOCAL_PERF/run"
 	echo -n "  "
 	read_file "$LOCAL_PERF/run"
 	echo "  Passed"
 
 	echo "Running remote perf test $WITH DMA"
-	write_file "" "$REMOTE_PERF/run"
+	write_file "$REMOTE_PIDX" "$REMOTE_PERF/run"
 	echo -n "  "
 	read_file "$REMOTE_PERF/run"
 	echo "  Passed"
-- 
cgit v1.2.3


From 30f1d370744cc35f26d78a1dd31aeb0e4be93c38 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 26 Jan 2018 01:36:39 +0200
Subject: tools/virtio: switch to __ptr_ring_empty

We don't rely on lockless guarantees, but it
seems cleaner than inverting __ptr_ring_peek.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/virtio/ringtest/ptr_ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index e6e81305ef46..477899c12c51 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -187,7 +187,7 @@ bool enable_kick()
 
 bool avail_empty()
 {
-	return !__ptr_ring_peek(&array);
+	return __ptr_ring_empty(&array);
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
-- 
cgit v1.2.3


From 6dd42157830d875bdd3c6fefd05cbcf0875b51e8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 26 Jan 2018 01:36:40 +0200
Subject: tools/virtio: more stubs to fix tools build

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/virtio/linux/kernel.h      | 2 +-
 tools/virtio/linux/thread_info.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 tools/virtio/linux/thread_info.h

(limited to 'tools')

diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 395521a7a8d8..fca8381bbe04 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr)
 #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 
-#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
+#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
 
 #define min(x, y) ({				\
 	typeof(x) _min1 = (x);			\
diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h
new file mode 100644
index 000000000000..e0f610d08006
--- /dev/null
+++ b/tools/virtio/linux/thread_info.h
@@ -0,0 +1 @@
+#define check_copy_size(A, B, C) (1)
-- 
cgit v1.2.3


From b4eab7de6685ee2691a7e297d511a126dbf53207 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 26 Jan 2018 01:36:42 +0200
Subject: tools/virtio: copy READ/WRITE_ONCE

This is to make ptr_ring test build again.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/virtio/ringtest/main.h | 57 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'tools')

diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 5706e075adf2..593a3289c87d 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -134,4 +134,61 @@ static inline void busy_wait(void)
     barrier(); \
 } while (0)
 
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+#define smp_wmb() barrier()
+#else
+#define smp_wmb() smp_release()
+#endif
+
+#ifdef __alpha__
+#define smp_read_barrier_depends() smp_acquire()
+#else
+#define smp_read_barrier_depends() do {} while(0)
+#endif
+
+static __always_inline
+void __read_once_size(const volatile void *p, void *res, int size)
+{
+        switch (size) {                                                 \
+        case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;              \
+        case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;            \
+        case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;            \
+        case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;            \
+        default:                                                        \
+                barrier();                                              \
+                __builtin_memcpy((void *)res, (const void *)p, size);   \
+                barrier();                                              \
+        }                                                               \
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
+	case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
+	case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
+	case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		barrier();
+	}
+}
+
+#define READ_ONCE(x) \
+({									\
+	union { typeof(x) __val; char __c[1]; } __u;			\
+	__read_once_size(&(x), __u.__c, sizeof(x));		\
+	smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+	__u.__val;							\
+})
+
+#define WRITE_ONCE(x, val) \
+({							\
+	union { typeof(x) __val; char __c[1]; } __u =	\
+		{ .__val = (typeof(x)) (val) }; \
+	__write_once_size(&(x), __u.__c, sizeof(x));	\
+	__u.__val;					\
+})
+
 #endif
-- 
cgit v1.2.3


From 491847f3b29cef0417a03142b96e2a6dea81cca0 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 26 Jan 2018 01:36:44 +0200
Subject: tools/virtio: fix smp_mb on x86

Offset 128 overlaps the last word of the redzone.
Use 132 which is always beyond that.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/virtio/ringtest/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 593a3289c87d..301d59bfcd0a 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -111,7 +111,7 @@ static inline void busy_wait(void)
 } 
 
 #if defined(__x86_64__) || defined(__i386__)
-#define smp_mb()     asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+#define smp_mb()     asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
 #else
 /*
  * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
-- 
cgit v1.2.3


From a845c7cf4b4cb5e9e3b2823867892b27646f3a98 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 29 Jan 2018 22:00:39 -0600
Subject: objtool: Improve retpoline alternative handling

Currently objtool requires all retpolines to be:

  a) patched in with alternatives; and

  b) annotated with ANNOTATE_NOSPEC_ALTERNATIVE.

If you forget to do both of the above, objtool segfaults trying to
dereference a NULL 'insn->call_dest' pointer.

Avoid that situation and print a more helpful error message:

  quirks.o: warning: objtool: efi_delete_dummy_variable()+0x99: unsupported intra-function call
  quirks.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.

Future improvements can be made to make objtool smarter with respect to
retpolines, but this is a good incremental improvement for now.

Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/819e50b6d9c2e1a22e34c1a636c0b2057cc8c6e5.1517284349.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f40d46e24bcc..bc3490d929ff 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file)
 			dest_off = insn->offset + insn->len + insn->immediate;
 			insn->call_dest = find_symbol_by_offset(insn->sec,
 								dest_off);
-			/*
-			 * FIXME: Thanks to retpolines, it's now considered
-			 * normal for a function to call within itself.  So
-			 * disable this warning for now.
-			 */
-#if 0
-			if (!insn->call_dest) {
-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
-					  insn->sec, insn->offset, dest_off);
+
+			if (!insn->call_dest && !insn->ignore) {
+				WARN_FUNC("unsupported intra-function call",
+					  insn->sec, insn->offset);
+				WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
 				return -1;
 			}
-#endif
+
 		} else if (rela->sym->type == STT_SECTION) {
 			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
 								rela->addend+4);
@@ -648,6 +644,8 @@ static int handle_group_alt(struct objtool_file *file,
 
 		last_new_insn = insn;
 
+		insn->ignore = orig_insn->ignore_alts;
+
 		if (insn->type != INSN_JUMP_CONDITIONAL &&
 		    insn->type != INSN_JUMP_UNCONDITIONAL)
 			continue;
@@ -729,10 +727,6 @@ static int add_special_section_alts(struct objtool_file *file)
 			goto out;
 		}
 
-		/* Ignore retpoline alternatives. */
-		if (orig_insn->ignore_alts)
-			continue;
-
 		new_insn = NULL;
 		if (!special_alt->group || special_alt->new_len) {
 			new_insn = find_insn(file, special_alt->new_sec,
@@ -1089,11 +1083,11 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
-	ret = add_call_destinations(file);
+	ret = add_special_section_alts(file);
 	if (ret)
 		return ret;
 
-	ret = add_special_section_alts(file);
+	ret = add_call_destinations(file);
 	if (ret)
 		return ret;
 
@@ -1720,10 +1714,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
 		insn->visited = true;
 
-		list_for_each_entry(alt, &insn->alts, list) {
-			ret = validate_branch(file, alt->insn, state);
-			if (ret)
-				return 1;
+		if (!insn->ignore_alts) {
+			list_for_each_entry(alt, &insn->alts, list) {
+				ret = validate_branch(file, alt->insn, state);
+				if (ret)
+					return 1;
+			}
 		}
 
 		switch (insn->type) {
-- 
cgit v1.2.3


From 17bc33914bcc98ba3c6b426fd1c49587a25c0597 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 29 Jan 2018 22:00:40 -0600
Subject: objtool: Add support for alternatives at the end of a section

Now that the previous patch gave objtool the ability to read retpoline
alternatives, it shows a new warning:

  arch/x86/entry/entry_64.o: warning: objtool: .entry_trampoline: don't know how to handle alternatives at end of section

This is due to the JMP_NOSPEC in entry_SYSCALL_64_trampoline().

Previously, objtool ignored this situation because it wasn't needed, and
it would have required a bit of extra code.  Now that this case exists,
add proper support for it.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2a30a3c2158af47d891a76e69bb1ef347e0443fd.1517284349.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 53 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index bc3490d929ff..9cd028aa1509 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -594,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file,
 			    struct instruction *orig_insn,
 			    struct instruction **new_insn)
 {
-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
 	unsigned long dest_off;
 
 	last_orig_insn = NULL;
@@ -610,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file,
 		last_orig_insn = insn;
 	}
 
-	if (!next_insn_same_sec(file, last_orig_insn)) {
-		WARN("%s: don't know how to handle alternatives at end of section",
-		     special_alt->orig_sec->name);
-		return -1;
-	}
-
-	fake_jump = malloc(sizeof(*fake_jump));
-	if (!fake_jump) {
-		WARN("malloc failed");
-		return -1;
+	if (next_insn_same_sec(file, last_orig_insn)) {
+		fake_jump = malloc(sizeof(*fake_jump));
+		if (!fake_jump) {
+			WARN("malloc failed");
+			return -1;
+		}
+		memset(fake_jump, 0, sizeof(*fake_jump));
+		INIT_LIST_HEAD(&fake_jump->alts);
+		clear_insn_state(&fake_jump->state);
+
+		fake_jump->sec = special_alt->new_sec;
+		fake_jump->offset = -1;
+		fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+		fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+		fake_jump->ignore = true;
 	}
-	memset(fake_jump, 0, sizeof(*fake_jump));
-	INIT_LIST_HEAD(&fake_jump->alts);
-	clear_insn_state(&fake_jump->state);
-
-	fake_jump->sec = special_alt->new_sec;
-	fake_jump->offset = -1;
-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-	fake_jump->ignore = true;
 
 	if (!special_alt->new_len) {
+		if (!fake_jump) {
+			WARN("%s: empty alternative at end of section",
+			     special_alt->orig_sec->name);
+			return -1;
+		}
+
 		*new_insn = fake_jump;
 		return 0;
 	}
@@ -654,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file,
 			continue;
 
 		dest_off = insn->offset + insn->len + insn->immediate;
-		if (dest_off == special_alt->new_off + special_alt->new_len)
+		if (dest_off == special_alt->new_off + special_alt->new_len) {
+			if (!fake_jump) {
+				WARN("%s: alternative jump to end of section",
+				     special_alt->orig_sec->name);
+				return -1;
+			}
 			insn->jump_dest = fake_jump;
+		}
 
 		if (!insn->jump_dest) {
 			WARN_FUNC("can't find alternative jump destination",
@@ -670,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file,
 		return -1;
 	}
 
-	list_add(&fake_jump->list, &last_new_insn->list);
+	if (fake_jump)
+		list_add(&fake_jump->list, &last_new_insn->list);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 830c1e3d16b2c1733cd1ec9c8f4d47a398ae31bc Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 29 Jan 2018 22:00:41 -0600
Subject: objtool: Warn on stripped section symbol

With the following fix:

  2a0098d70640 ("objtool: Fix seg fault with gold linker")

... a seg fault was avoided, but the original seg fault condition in
objtool wasn't fixed.  Replace the seg fault with an error message.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/dc4585a70d6b975c99fc51d1957ccdde7bd52f3a.1517284349.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/orc_gen.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index e61fe703197b..18384d9be4e1 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
 	struct orc_entry *orc;
 	struct rela *rela;
 
+	if (!insn_sec->sym) {
+		WARN("missing symbol for section %s", insn_sec->name);
+		return -1;
+	}
+
 	/* populate ORC data */
 	orc = (struct orc_entry *)u_sec->data->d_buf + idx;
 	memcpy(orc, o, sizeof(*orc));
-- 
cgit v1.2.3


From f229a55c31a7e12a15c7b4dcf9a97a9bf7a72ce8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 26 Oct 2017 04:48:01 +0300
Subject: virtio/ringtest: fix up need_event math

last kicked event index must be updated unconditionally:
even if we don't need to kick, we do not want to re-check
the same entry for events.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 tools/virtio/ringtest/ring.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
index 747c5dd47be8..2b9859beea65 100644
--- a/tools/virtio/ringtest/ring.c
+++ b/tools/virtio/ringtest/ring.c
@@ -188,16 +188,18 @@ bool enable_call()
 
 void kick_available(void)
 {
+	bool need;
+
 	/* Flush in previous flags write */
 	/* Barrier C (for pairing) */
 	smp_mb();
-	if (!need_event(event->kick_index,
-			guest.avail_idx,
-			guest.kicked_avail_idx))
-		return;
+	need = need_event(event->kick_index,
+			   guest.avail_idx,
+			   guest.kicked_avail_idx);
 
 	guest.kicked_avail_idx = guest.avail_idx;
-	kick();
+	if (need)
+		kick();
 }
 
 /* host side */
@@ -253,14 +255,18 @@ bool use_buf(unsigned *lenp, void **bufp)
 
 void call_used(void)
 {
+	bool need;
+
 	/* Flush in previous flags write */
 	/* Barrier D (for pairing) */
 	smp_mb();
-	if (!need_event(event->call_index,
+
+	need = need_event(event->call_index,
 			host.used_idx,
-			host.called_used_idx))
-		return;
+			host.called_used_idx);
 
 	host.called_used_idx = host.used_idx;
-	call();
+
+	if (need)
+		call();
 }
-- 
cgit v1.2.3


From a311650ae6ad0c169ade2583f78aa519878ea13f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 27 Oct 2017 16:11:13 +0300
Subject: virtio/ringtest: virtio_ring: fix up need_event math

last kicked event index must be updated unconditionally:
even if we don't need to kick, we do not want to re-check
the same entry for events.

Reported-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
---
 tools/virtio/ringtest/virtio_ring_0_9.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
index bbc3043b2fb1..5fd3fbcb9e57 100644
--- a/tools/virtio/ringtest/virtio_ring_0_9.c
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -225,16 +225,18 @@ bool enable_call()
 
 void kick_available(void)
 {
+	bool need;
+
 	/* Flush in previous flags write */
 	/* Barrier C (for pairing) */
 	smp_mb();
-	if (!vring_need_event(vring_avail_event(&ring),
-			      guest.avail_idx,
-			      guest.kicked_avail_idx))
-		return;
+	need = vring_need_event(vring_avail_event(&ring),
+				guest.avail_idx,
+				guest.kicked_avail_idx);
 
 	guest.kicked_avail_idx = guest.avail_idx;
-	kick();
+	if (need)
+		kick();
 }
 
 /* host side */
@@ -316,14 +318,16 @@ bool use_buf(unsigned *lenp, void **bufp)
 
 void call_used(void)
 {
+	bool need;
+
 	/* Flush in previous flags write */
 	/* Barrier D (for pairing) */
 	smp_mb();
-	if (!vring_need_event(vring_used_event(&ring),
-			      host.used_idx,
-			      host.called_used_idx))
-		return;
+	need = vring_need_event(vring_used_event(&ring),
+				host.used_idx,
+				host.called_used_idx);
 
 	host.called_used_idx = host.used_idx;
-	call();
+	if (need)
+		call();
 }
-- 
cgit v1.2.3


From 235266b8e11c9db12497bdfc6d5e9100f3434c24 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 31 Jan 2018 16:16:59 -0800
Subject: selftests/vm: move 128TB mmap boundary test to generic directory

Architectures like PPC64 support mmap hint address based large address
space selection.  This test can be run on those architectures too.  Move
the test from the x86 selftests to selftest/vm so that other
architectures can use it too.

We also add a few new test scenarios in this patch.  We do test a few
boundary conditions before we do a high address mmap.  PPC64 uses the
address limit to validate the address in the fault path.  We had bugs in
this area w.r.t SLB fault handling before we updated the addess limit.

We also touch the allocated space to make sure we don't have any bugs in
the fault handling path.

[akpm@linux-foundation.org: restore tools/testing/selftests/vm/Makefile alpha ordering]
Link: http://lkml.kernel.org/r/20171123165226.32582-1-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile         |   5 +-
 tools/testing/selftests/vm/run_vmtests      |  11 ++
 tools/testing/selftests/vm/va_128TBswitch.c | 297 ++++++++++++++++++++++++++++
 tools/testing/selftests/x86/5lvl.c          | 177 -----------------
 4 files changed, 311 insertions(+), 179 deletions(-)
 create mode 100644 tools/testing/selftests/vm/va_128TBswitch.c
 delete mode 100644 tools/testing/selftests/x86/5lvl.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 7f45806bd863..fdefa2295ddc 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -8,17 +8,18 @@ endif
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES += gup_benchmark
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
 TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
 TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
-TEST_GEN_FILES += gup_benchmark
 
 TEST_PROGS := run_vmtests
 
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index cc826326de87..d2561895a021 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -177,4 +177,15 @@ else
 	echo "[PASS]"
 fi
 
+echo "-----------------------------"
+echo "running virtual address 128TB switch test"
+echo "-----------------------------"
+./va_128TBswitch
+if [ $? -ne 0 ]; then
+    echo "[FAIL]"
+    exitcode=1
+else
+    echo "[PASS]"
+fi
+
 exit $exitcode
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
new file mode 100644
index 000000000000..e7fe734c374f
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+ * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#ifdef __powerpc64__
+#define PAGE_SIZE	(64 << 10)
+/*
+ * This will work with 16M and 2M hugepage size
+ */
+#define HUGETLB_SIZE	(16 << 20)
+#else
+#define PAGE_SIZE	(4 << 10)
+#define HUGETLB_SIZE	(2 << 20)
+#endif
+
+/*
+ * >= 128TB is the hint addr value we used to select
+ * large address space.
+ */
+#define ADDR_SWITCH_HINT (1UL << 47)
+#define LOW_ADDR	((void *) (1UL << 30))
+#define HIGH_ADDR	((void *) (1UL << 48))
+
+struct testcase {
+	void *addr;
+	unsigned long size;
+	unsigned long flags;
+	const char *msg;
+	unsigned int low_addr_required:1;
+	unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+	{
+		/*
+		 * If stack is moved, we could possibly allocate
+		 * this at the requested address.
+		 */
+		.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+		.low_addr_required = 1,
+	},
+	{
+		/*
+		 * We should never allocate at the requested address or above it
+		 * The len cross the 128TB boundary. Without MAP_FIXED
+		 * we will always search in the lower address space.
+		 */
+		.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))",
+		.low_addr_required = 1,
+	},
+	{
+		/*
+		 * Exact mapping at 128TB, the area is free we should get that
+		 * even without MAP_FIXED.
+		 */
+		.addr = ((void *)(ADDR_SWITCH_HINT)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+	},
+	{
+		.addr = NULL,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+	},
+	{
+		.addr = (void *) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1) again",
+	},
+	{
+		.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = ((void *)(ADDR_SWITCH_HINT)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+	},
+};
+
+static struct testcase hugetlb_testcases[] = {
+	{
+		.addr = NULL,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+	},
+	{
+		.addr = (void *) -1,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *) -1,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB) again",
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+		.size = 2 * HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT),
+		.size = 2 * HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)",
+	},
+};
+
+static int run_test(struct testcase *test, int count)
+{
+	void *p;
+	int i, ret = 0;
+
+	for (i = 0; i < count; i++) {
+		struct testcase *t = test + i;
+
+		p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0);
+
+		printf("%s: %p - ", t->msg, p);
+
+		if (p == MAP_FAILED) {
+			printf("FAILED\n");
+			ret = 1;
+			continue;
+		}
+
+		if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
+			printf("FAILED\n");
+			ret = 1;
+		} else {
+			/*
+			 * Do a dereference of the address returned so that we catch
+			 * bugs in page fault handling
+			 */
+			memset(p, 0, t->size);
+			printf("OK\n");
+		}
+		if (!t->keep_mapped)
+			munmap(p, t->size);
+	}
+
+	return ret;
+}
+
+static int supported_arch(void)
+{
+#if defined(__powerpc64__)
+	return 1;
+#elif defined(__x86_64__)
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	if (!supported_arch())
+		return 0;
+
+	ret = run_test(testcases, ARRAY_SIZE(testcases));
+	if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
+		ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases));
+	return ret;
+}
diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c
deleted file mode 100644
index 2eafdcd4c2b3..000000000000
--- a/tools/testing/selftests/x86/5lvl.c
+++ /dev/null
@@ -1,177 +0,0 @@
-#include <stdio.h>
-#include <sys/mman.h>
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-#define PAGE_SIZE	4096
-#define LOW_ADDR	((void *) (1UL << 30))
-#define HIGH_ADDR	((void *) (1UL << 50))
-
-struct testcase {
-	void *addr;
-	unsigned long size;
-	unsigned long flags;
-	const char *msg;
-	unsigned int low_addr_required:1;
-	unsigned int keep_mapped:1;
-};
-
-static struct testcase testcases[] = {
-	{
-		.addr = NULL,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(NULL)",
-		.low_addr_required = 1,
-	},
-	{
-		.addr = LOW_ADDR,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(LOW_ADDR)",
-		.low_addr_required = 1,
-	},
-	{
-		.addr = HIGH_ADDR,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(HIGH_ADDR)",
-		.keep_mapped = 1,
-	},
-	{
-		.addr = HIGH_ADDR,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(HIGH_ADDR) again",
-		.keep_mapped = 1,
-	},
-	{
-		.addr = HIGH_ADDR,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-		.msg = "mmap(HIGH_ADDR, MAP_FIXED)",
-	},
-	{
-		.addr = (void*) -1,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(-1)",
-		.keep_mapped = 1,
-	},
-	{
-		.addr = (void*) -1,
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(-1) again",
-	},
-	{
-		.addr = (void *)((1UL << 47) - PAGE_SIZE),
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap((1UL << 47), 2 * PAGE_SIZE)",
-		.low_addr_required = 1,
-		.keep_mapped = 1,
-	},
-	{
-		.addr = (void *)((1UL << 47) - PAGE_SIZE / 2),
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)",
-		.low_addr_required = 1,
-		.keep_mapped = 1,
-	},
-	{
-		.addr = (void *)((1UL << 47) - PAGE_SIZE),
-		.size = 2 * PAGE_SIZE,
-		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-		.msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)",
-	},
-	{
-		.addr = NULL,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(NULL, MAP_HUGETLB)",
-		.low_addr_required = 1,
-	},
-	{
-		.addr = LOW_ADDR,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
-		.low_addr_required = 1,
-	},
-	{
-		.addr = HIGH_ADDR,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
-		.keep_mapped = 1,
-	},
-	{
-		.addr = HIGH_ADDR,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
-		.keep_mapped = 1,
-	},
-	{
-		.addr = HIGH_ADDR,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-		.msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
-	},
-	{
-		.addr = (void*) -1,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(-1, MAP_HUGETLB)",
-		.keep_mapped = 1,
-	},
-	{
-		.addr = (void*) -1,
-		.size = 2UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap(-1, MAP_HUGETLB) again",
-	},
-	{
-		.addr = (void *)((1UL << 47) - PAGE_SIZE),
-		.size = 4UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
-		.msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)",
-		.low_addr_required = 1,
-		.keep_mapped = 1,
-	},
-	{
-		.addr = (void *)((1UL << 47) - (2UL << 20)),
-		.size = 4UL << 20,
-		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-		.msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)",
-	},
-};
-
-int main(int argc, char **argv)
-{
-	int i;
-	void *p;
-
-	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
-		struct testcase *t = testcases + i;
-
-		p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0);
-
-		printf("%s: %p - ", t->msg, p);
-
-		if (p == MAP_FAILED) {
-			printf("FAILED\n");
-			continue;
-		}
-
-		if (t->low_addr_required && p >= (void *)(1UL << 47))
-			printf("FAILED\n");
-		else
-			printf("OK\n");
-		if (!t->keep_mapped)
-			munmap(p, t->size);
-	}
-	return 0;
-}
-- 
cgit v1.2.3


From 724978457fe29626bb6ce78e6c324c9d2648dcf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Wed, 31 Jan 2018 16:19:32 -0800
Subject: memfd-test: test hugetlbfs sealing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove most of the special-casing of hugetlbfs now that sealing is
supported.

Link: http://lkml.kernel.org/r/20171107122800.25517-7-marcandre.lureau@redhat.com
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/memfd/memfd_test.c | 150 +++--------------------------
 1 file changed, 15 insertions(+), 135 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 132a54f74e88..59ca090e9752 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -513,6 +513,10 @@ static void mfd_assert_grow_write(int fd)
 	static char *buf;
 	ssize_t l;
 
+	/* hugetlbfs does not support write */
+	if (hugetlbfs_test)
+		return;
+
 	buf = malloc(mfd_def_size * 8);
 	if (!buf) {
 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
@@ -533,6 +537,10 @@ static void mfd_fail_grow_write(int fd)
 	static char *buf;
 	ssize_t l;
 
+	/* hugetlbfs does not support write */
+	if (hugetlbfs_test)
+		return;
+
 	buf = malloc(mfd_def_size * 8);
 	if (!buf) {
 		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
@@ -627,18 +635,13 @@ static void test_create(void)
 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
 	close(fd);
 
-	if (!hugetlbfs_test) {
-		/* verify MFD_ALLOW_SEALING is allowed */
-		fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
-		close(fd);
-
-		/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
-		fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
-		close(fd);
-	} else {
-		/* sealing is not supported on hugetlbfs */
-		mfd_fail_new("", MFD_ALLOW_SEALING);
-	}
+	/* verify MFD_ALLOW_SEALING is allowed */
+	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+	close(fd);
+
+	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+	close(fd);
 }
 
 /*
@@ -649,10 +652,6 @@ static void test_basic(void)
 {
 	int fd;
 
-	/* hugetlbfs does not contain sealing support */
-	if (hugetlbfs_test)
-		return;
-
 	printf("%s BASIC\n", MEMFD_STR);
 
 	fd = mfd_assert_new("kern_memfd_basic",
@@ -697,28 +696,6 @@ static void test_basic(void)
 	close(fd);
 }
 
-/*
- * hugetlbfs doesn't support seals or write, so just verify grow and shrink
- * on a hugetlbfs file created via memfd_create.
- */
-static void test_hugetlbfs_grow_shrink(void)
-{
-	int fd;
-
-	printf("%s HUGETLBFS-GROW-SHRINK\n", MEMFD_STR);
-
-	fd = mfd_assert_new("kern_memfd_seal_write",
-			    mfd_def_size,
-			    MFD_CLOEXEC);
-
-	mfd_assert_read(fd);
-	mfd_assert_write(fd);
-	mfd_assert_shrink(fd);
-	mfd_assert_grow(fd);
-
-	close(fd);
-}
-
 /*
  * Test SEAL_WRITE
  * Test whether SEAL_WRITE actually prevents modifications.
@@ -727,13 +704,6 @@ static void test_seal_write(void)
 {
 	int fd;
 
-	/*
-	 * hugetlbfs does not contain sealing or write support.  Just test
-	 * basic grow and shrink via test_hugetlbfs_grow_shrink.
-	 */
-	if (hugetlbfs_test)
-		return test_hugetlbfs_grow_shrink();
-
 	printf("%s SEAL-WRITE\n", MEMFD_STR);
 
 	fd = mfd_assert_new("kern_memfd_seal_write",
@@ -760,10 +730,6 @@ static void test_seal_shrink(void)
 {
 	int fd;
 
-	/* hugetlbfs does not contain sealing support */
-	if (hugetlbfs_test)
-		return;
-
 	printf("%s SEAL-SHRINK\n", MEMFD_STR);
 
 	fd = mfd_assert_new("kern_memfd_seal_shrink",
@@ -790,10 +756,6 @@ static void test_seal_grow(void)
 {
 	int fd;
 
-	/* hugetlbfs does not contain sealing support */
-	if (hugetlbfs_test)
-		return;
-
 	printf("%s SEAL-GROW\n", MEMFD_STR);
 
 	fd = mfd_assert_new("kern_memfd_seal_grow",
@@ -820,10 +782,6 @@ static void test_seal_resize(void)
 {
 	int fd;
 
-	/* hugetlbfs does not contain sealing support */
-	if (hugetlbfs_test)
-		return;
-
 	printf("%s SEAL-RESIZE\n", MEMFD_STR);
 
 	fd = mfd_assert_new("kern_memfd_seal_resize",
@@ -842,32 +800,6 @@ static void test_seal_resize(void)
 	close(fd);
 }
 
-/*
- * hugetlbfs does not support seals.  Basic test to dup the memfd created
- * fd and perform some basic operations on it.
- */
-static void hugetlbfs_dup(char *b_suffix)
-{
-	int fd, fd2;
-
-	printf("%s HUGETLBFS-DUP %s\n", MEMFD_STR, b_suffix);
-
-	fd = mfd_assert_new("kern_memfd_share_dup",
-			    mfd_def_size,
-			    MFD_CLOEXEC);
-
-	fd2 = mfd_assert_dup(fd);
-
-	mfd_assert_read(fd);
-	mfd_assert_write(fd);
-
-	mfd_assert_shrink(fd2);
-	mfd_assert_grow(fd2);
-
-	close(fd2);
-	close(fd);
-}
-
 /*
  * Test sharing via dup()
  * Test that seals are shared between dupped FDs and they're all equal.
@@ -876,15 +808,6 @@ static void test_share_dup(char *banner, char *b_suffix)
 {
 	int fd, fd2;
 
-	/*
-	 * hugetlbfs does not contain sealing support.  Perform some
-	 * basic testing on dup'ed fd instead via hugetlbfs_dup.
-	 */
-	if (hugetlbfs_test) {
-		hugetlbfs_dup(b_suffix);
-		return;
-	}
-
 	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_dup",
@@ -927,10 +850,6 @@ static void test_share_mmap(char *banner, char *b_suffix)
 	int fd;
 	void *p;
 
-	/* hugetlbfs does not contain sealing support */
-	if (hugetlbfs_test)
-		return;
-
 	printf("%s %s %s\n", MEMFD_STR,  banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_mmap",
@@ -955,32 +874,6 @@ static void test_share_mmap(char *banner, char *b_suffix)
 	close(fd);
 }
 
-/*
- * Basic test to make sure we can open the hugetlbfs fd via /proc and
- * perform some simple operations on it.
- */
-static void hugetlbfs_proc_open(char *b_suffix)
-{
-	int fd, fd2;
-
-	printf("%s HUGETLBFS-PROC-OPEN %s\n", MEMFD_STR, b_suffix);
-
-	fd = mfd_assert_new("kern_memfd_share_open",
-			    mfd_def_size,
-			    MFD_CLOEXEC);
-
-	fd2 = mfd_assert_open(fd, O_RDWR, 0);
-
-	mfd_assert_read(fd);
-	mfd_assert_write(fd);
-
-	mfd_assert_shrink(fd2);
-	mfd_assert_grow(fd2);
-
-	close(fd2);
-	close(fd);
-}
-
 /*
  * Test sealing with open(/proc/self/fd/%d)
  * Via /proc we can get access to a separate file-context for the same memfd.
@@ -991,15 +884,6 @@ static void test_share_open(char *banner, char *b_suffix)
 {
 	int fd, fd2;
 
-	/*
-	 * hugetlbfs does not contain sealing support.  So test basic
-	 * functionality of using /proc fd via hugetlbfs_proc_open
-	 */
-	if (hugetlbfs_test) {
-		hugetlbfs_proc_open(b_suffix);
-		return;
-	}
-
 	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_open",
@@ -1043,10 +927,6 @@ static void test_share_fork(char *banner, char *b_suffix)
 	int fd;
 	pid_t pid;
 
-	/* hugetlbfs does not contain sealing support */
-	if (hugetlbfs_test)
-		return;
-
 	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_fork",
-- 
cgit v1.2.3


From 3037aeb99134b0907fe0901055570a329b1f583d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Wed, 31 Jan 2018 16:19:36 -0800
Subject: memfd-test: add 'memfd-hugetlb:' prefix when testing hugetlbfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Link: http://lkml.kernel.org/r/20171107122800.25517-8-marcandre.lureau@redhat.com
Suggested-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/memfd/memfd_test.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 59ca090e9752..910c55f858bb 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -20,6 +20,7 @@
 #include <unistd.h>
 
 #define MEMFD_STR	"memfd:"
+#define MEMFD_HUGE_STR	"memfd-hugetlb:"
 #define SHARED_FT_STR	"(shared file-table)"
 
 #define MFD_DEF_SIZE 8192
@@ -30,6 +31,7 @@
  */
 static int hugetlbfs_test;
 static size_t mfd_def_size = MFD_DEF_SIZE;
+static const char *memfd_str = MEMFD_STR;
 
 /*
  * Copied from mlock2-tests.c
@@ -606,7 +608,7 @@ static void test_create(void)
 	char buf[2048];
 	int fd;
 
-	printf("%s CREATE\n", MEMFD_STR);
+	printf("%s CREATE\n", memfd_str);
 
 	/* test NULL name */
 	mfd_fail_new(NULL, 0);
@@ -652,7 +654,7 @@ static void test_basic(void)
 {
 	int fd;
 
-	printf("%s BASIC\n", MEMFD_STR);
+	printf("%s BASIC\n", memfd_str);
 
 	fd = mfd_assert_new("kern_memfd_basic",
 			    mfd_def_size,
@@ -704,7 +706,7 @@ static void test_seal_write(void)
 {
 	int fd;
 
-	printf("%s SEAL-WRITE\n", MEMFD_STR);
+	printf("%s SEAL-WRITE\n", memfd_str);
 
 	fd = mfd_assert_new("kern_memfd_seal_write",
 			    mfd_def_size,
@@ -730,7 +732,7 @@ static void test_seal_shrink(void)
 {
 	int fd;
 
-	printf("%s SEAL-SHRINK\n", MEMFD_STR);
+	printf("%s SEAL-SHRINK\n", memfd_str);
 
 	fd = mfd_assert_new("kern_memfd_seal_shrink",
 			    mfd_def_size,
@@ -756,7 +758,7 @@ static void test_seal_grow(void)
 {
 	int fd;
 
-	printf("%s SEAL-GROW\n", MEMFD_STR);
+	printf("%s SEAL-GROW\n", memfd_str);
 
 	fd = mfd_assert_new("kern_memfd_seal_grow",
 			    mfd_def_size,
@@ -782,7 +784,7 @@ static void test_seal_resize(void)
 {
 	int fd;
 
-	printf("%s SEAL-RESIZE\n", MEMFD_STR);
+	printf("%s SEAL-RESIZE\n", memfd_str);
 
 	fd = mfd_assert_new("kern_memfd_seal_resize",
 			    mfd_def_size,
@@ -808,7 +810,7 @@ static void test_share_dup(char *banner, char *b_suffix)
 {
 	int fd, fd2;
 
-	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
+	printf("%s %s %s\n", memfd_str, banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_dup",
 			    mfd_def_size,
@@ -850,7 +852,7 @@ static void test_share_mmap(char *banner, char *b_suffix)
 	int fd;
 	void *p;
 
-	printf("%s %s %s\n", MEMFD_STR,  banner, b_suffix);
+	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_mmap",
 			    mfd_def_size,
@@ -884,7 +886,7 @@ static void test_share_open(char *banner, char *b_suffix)
 {
 	int fd, fd2;
 
-	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
+	printf("%s %s %s\n", memfd_str, banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_open",
 			    mfd_def_size,
@@ -927,7 +929,7 @@ static void test_share_fork(char *banner, char *b_suffix)
 	int fd;
 	pid_t pid;
 
-	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
+	printf("%s %s %s\n", memfd_str, banner, b_suffix);
 
 	fd = mfd_assert_new("kern_memfd_share_fork",
 			    mfd_def_size,
@@ -963,7 +965,11 @@ int main(int argc, char **argv)
 			}
 
 			hugetlbfs_test = 1;
+			memfd_str = MEMFD_HUGE_STR;
 			mfd_def_size = hpage_size * 2;
+		} else {
+			printf("Unknown option: %s\n", argv[1]);
+			abort();
 		}
 	}
 
-- 
cgit v1.2.3


From 29f34d1dd6657a0c7da875deb57775c67ff6bd86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Wed, 31 Jan 2018 16:19:40 -0800
Subject: memfd-test: move common code to a shared unit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memfd & fuse tests will share more common code in the following
commits to test hugetlb support.

Link: http://lkml.kernel.org/r/20171107122800.25517-9-marcandre.lureau@redhat.com
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/memfd/Makefile     |  5 ++++
 tools/testing/selftests/memfd/common.c     | 46 ++++++++++++++++++++++++++++++
 tools/testing/selftests/memfd/common.h     |  9 ++++++
 tools/testing/selftests/memfd/fuse_test.c  |  8 ++----
 tools/testing/selftests/memfd/memfd_test.c | 36 ++---------------------
 5 files changed, 64 insertions(+), 40 deletions(-)
 create mode 100644 tools/testing/selftests/memfd/common.c
 create mode 100644 tools/testing/selftests/memfd/common.h

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 3926a0409dda..a5276a91dfbf 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -12,3 +12,8 @@ fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
 include ../lib.mk
 
 $(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs)
+
+$(OUTPUT)/memfd_test: memfd_test.c common.o
+$(OUTPUT)/fuse_test: fuse_test.c common.o
+
+EXTRA_CLEAN = common.o
diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c
new file mode 100644
index 000000000000..8eb3d75f6e60
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "common.h"
+
+int hugetlbfs_test = 0;
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(f);
+	return hps;
+}
+
+int sys_memfd_create(const char *name, unsigned int flags)
+{
+	if (hugetlbfs_test)
+		flags |= MFD_HUGETLB;
+
+	return syscall(__NR_memfd_create, name, flags);
+}
diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h
new file mode 100644
index 000000000000..522d2c630bd8
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.h
@@ -0,0 +1,9 @@
+#ifndef COMMON_H_
+#define COMMON_H_
+
+extern int hugetlbfs_test;
+
+unsigned long default_huge_page_size(void);
+int sys_memfd_create(const char *name, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index 1ccb7a3eb14b..795a25ba8521 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -33,15 +33,11 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "common.h"
+
 #define MFD_DEF_SIZE 8192
 #define STACK_SIZE 65536
 
-static int sys_memfd_create(const char *name,
-			    unsigned int flags)
-{
-	return syscall(__NR_memfd_create, name, flags);
-}
-
 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
 {
 	int r, fd;
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 910c55f858bb..10baa1652fc2 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -19,6 +19,8 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "common.h"
+
 #define MEMFD_STR	"memfd:"
 #define MEMFD_HUGE_STR	"memfd-hugetlb:"
 #define SHARED_FT_STR	"(shared file-table)"
@@ -29,43 +31,9 @@
 /*
  * Default is not to test hugetlbfs
  */
-static int hugetlbfs_test;
 static size_t mfd_def_size = MFD_DEF_SIZE;
 static const char *memfd_str = MEMFD_STR;
 
-/*
- * Copied from mlock2-tests.c
- */
-static unsigned long default_huge_page_size(void)
-{
-	unsigned long hps = 0;
-	char *line = NULL;
-	size_t linelen = 0;
-	FILE *f = fopen("/proc/meminfo", "r");
-
-	if (!f)
-		return 0;
-	while (getline(&line, &linelen, f) > 0) {
-		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
-			hps <<= 10;
-			break;
-		}
-	}
-
-	free(line);
-	fclose(f);
-	return hps;
-}
-
-static int sys_memfd_create(const char *name,
-			    unsigned int flags)
-{
-	if (hugetlbfs_test)
-		flags |= MFD_HUGETLB;
-
-	return syscall(__NR_memfd_create, name, flags);
-}
-
 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
 {
 	int r, fd;
-- 
cgit v1.2.3


From c5c63835e5713b09fc974241db47956362a63efa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Wed, 31 Jan 2018 16:19:44 -0800
Subject: memfd-test: run fuse test on hugetlb backend memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Link: http://lkml.kernel.org/r/20171107122800.25517-10-marcandre.lureau@redhat.com
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Suggested-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/memfd/fuse_test.c      | 38 ++++++++++++++++++++------
 tools/testing/selftests/memfd/run_fuse_test.sh |  2 +-
 tools/testing/selftests/memfd/run_tests.sh     |  1 +
 3 files changed, 32 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index 795a25ba8521..b018e835737d 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -38,6 +38,8 @@
 #define MFD_DEF_SIZE 8192
 #define STACK_SIZE 65536
 
+static size_t mfd_def_size = MFD_DEF_SIZE;
+
 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
 {
 	int r, fd;
@@ -123,7 +125,7 @@ static void *mfd_assert_mmap_shared(int fd)
 	void *p;
 
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ | PROT_WRITE,
 		 MAP_SHARED,
 		 fd,
@@ -141,7 +143,7 @@ static void *mfd_assert_mmap_private(int fd)
 	void *p;
 
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ | PROT_WRITE,
 		 MAP_PRIVATE,
 		 fd,
@@ -174,7 +176,7 @@ static int sealing_thread_fn(void *arg)
 	usleep(200000);
 
 	/* unmount mapping before sealing to avoid i_mmap_writable failures */
-	munmap(global_p, MFD_DEF_SIZE);
+	munmap(global_p, mfd_def_size);
 
 	/* Try sealing the global file; expect EBUSY or success. Current
 	 * kernels will never succeed, but in the future, kernels might
@@ -224,7 +226,7 @@ static void join_sealing_thread(pid_t pid)
 
 int main(int argc, char **argv)
 {
-	static const char zero[MFD_DEF_SIZE];
+	char *zero;
 	int fd, mfd, r;
 	void *p;
 	int was_sealed;
@@ -235,6 +237,25 @@ int main(int argc, char **argv)
 		abort();
 	}
 
+	if (argc >= 3) {
+		if (!strcmp(argv[2], "hugetlbfs")) {
+			unsigned long hpage_size = default_huge_page_size();
+
+			if (!hpage_size) {
+				printf("Unable to determine huge page size\n");
+				abort();
+			}
+
+			hugetlbfs_test = 1;
+			mfd_def_size = hpage_size * 2;
+		} else {
+			printf("Unknown option: %s\n", argv[2]);
+			abort();
+		}
+	}
+
+	zero = calloc(sizeof(*zero), mfd_def_size);
+
 	/* open FUSE memfd file for GUP testing */
 	printf("opening: %s\n", argv[1]);
 	fd = open(argv[1], O_RDONLY | O_CLOEXEC);
@@ -245,7 +266,7 @@ int main(int argc, char **argv)
 
 	/* create new memfd-object */
 	mfd = mfd_assert_new("kern_memfd_fuse",
-			     MFD_DEF_SIZE,
+			     mfd_def_size,
 			     MFD_CLOEXEC | MFD_ALLOW_SEALING);
 
 	/* mmap memfd-object for writing */
@@ -264,7 +285,7 @@ int main(int argc, char **argv)
 	 * This guarantees that the receive-buffer is pinned for 1s until the
 	 * data is written into it. The racing ADD_SEALS should thus fail as
 	 * the pages are still pinned. */
-	r = read(fd, p, MFD_DEF_SIZE);
+	r = read(fd, p, mfd_def_size);
 	if (r < 0) {
 		printf("read() failed: %m\n");
 		abort();
@@ -291,10 +312,10 @@ int main(int argc, char **argv)
 	 * enough to avoid any in-flight writes. */
 
 	p = mfd_assert_mmap_private(mfd);
-	if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
+	if (was_sealed && memcmp(p, zero, mfd_def_size)) {
 		printf("memfd sealed during read() but data not discarded\n");
 		abort();
-	} else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
+	} else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
 		printf("memfd sealed after read() but data discarded\n");
 		abort();
 	}
@@ -303,6 +324,7 @@ int main(int argc, char **argv)
 	close(fd);
 
 	printf("fuse: DONE\n");
+	free(zero);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
index 407df68dfe27..22e572e2d66a 100755
--- a/tools/testing/selftests/memfd/run_fuse_test.sh
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@@ -10,6 +10,6 @@ set -e
 
 mkdir mnt
 ./fuse_mnt ./mnt
-./fuse_test ./mnt/memfd
+./fuse_test ./mnt/memfd $@
 fusermount -u ./mnt
 rmdir ./mnt
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh
index daabb350697c..c2d41ed81b24 100755
--- a/tools/testing/selftests/memfd/run_tests.sh
+++ b/tools/testing/selftests/memfd/run_tests.sh
@@ -60,6 +60,7 @@ fi
 # Run the hugetlbfs test
 #
 ./memfd_test hugetlbfs
+./run_fuse_test.sh hugetlbfs
 
 #
 # Give back any huge pages allocated for the test
-- 
cgit v1.2.3


From c7905f200225d4257536f19b11d18f598fee5f44 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 31 Jan 2018 16:21:23 -0800
Subject: tools, vm: new option to specify kpageflags file

page-types currently hardcodes /proc/kpageflags as the file to parse.
This works when using the tool to examine the state of pageflags on the
same system, but does not allow storing a snapshot of pageflags at a
given time to debug issues nor on a different system.

This allows the user to specify a saved version of kpageflags with a new
page-types -F option.

[akpm@linux-foundation.org: add "filename" to fix usage() string]
[rientjes@google.com: fix layout]
  Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1801301840050.140969@chino.kir.corp.google.com
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1801301458180.153857@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/vm/page-types.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index e92903fc7113..a8783f48f77f 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -169,9 +169,10 @@ static int		opt_raw;	/* for kernel developers */
 static int		opt_list;	/* list pages (in ranges) */
 static int		opt_no_summary;	/* don't show summary */
 static pid_t		opt_pid;	/* process to walk */
-const char *		opt_file;	/* file or directory path */
+const char		*opt_file;	/* file or directory path */
 static uint64_t		opt_cgroup;	/* cgroup inode */
 static int		opt_list_cgroup;/* list page cgroup */
+static const char	*opt_kpageflags;/* kpageflags file to parse */
 
 #define MAX_ADDR_RANGES	1024
 static int		nr_addr_ranges;
@@ -258,7 +259,7 @@ static int checked_open(const char *pathname, int flags)
  * pagemap/kpageflags routines
  */
 
-static unsigned long do_u64_read(int fd, char *name,
+static unsigned long do_u64_read(int fd, const char *name,
 				 uint64_t *buf,
 				 unsigned long index,
 				 unsigned long count)
@@ -283,7 +284,7 @@ static unsigned long kpageflags_read(uint64_t *buf,
 				     unsigned long index,
 				     unsigned long pages)
 {
-	return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
+	return do_u64_read(kpageflags_fd, opt_kpageflags, buf, index, pages);
 }
 
 static unsigned long kpagecgroup_read(uint64_t *buf,
@@ -293,7 +294,7 @@ static unsigned long kpagecgroup_read(uint64_t *buf,
 	if (kpagecgroup_fd < 0)
 		return pages;
 
-	return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages);
+	return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages);
 }
 
 static unsigned long pagemap_read(uint64_t *buf,
@@ -743,7 +744,7 @@ static void walk_addr_ranges(void)
 {
 	int i;
 
-	kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
+	kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY);
 
 	if (!nr_addr_ranges)
 		add_addr_range(0, ULONG_MAX);
@@ -790,6 +791,7 @@ static void usage(void)
 "            -N|--no-summary            Don't show summary info\n"
 "            -X|--hwpoison              hwpoison pages\n"
 "            -x|--unpoison              unpoison pages\n"
+"            -F|--kpageflags filename   kpageflags file to parse\n"
 "            -h|--help                  Show this usage message\n"
 "flags:\n"
 "            0x10                       bitfield format, e.g.\n"
@@ -1013,7 +1015,7 @@ static void walk_page_cache(void)
 {
 	struct stat st;
 
-	kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
+	kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY);
 	pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY);
 	sigaction(SIGBUS, &sigbus_action, NULL);
 
@@ -1164,6 +1166,11 @@ static void parse_bits_mask(const char *optarg)
 	add_bits_filter(mask, bits);
 }
 
+static void parse_kpageflags(const char *name)
+{
+	opt_kpageflags = name;
+}
+
 static void describe_flags(const char *optarg)
 {
 	uint64_t flags = parse_flag_names(optarg, 0);
@@ -1188,6 +1195,7 @@ static const struct option opts[] = {
 	{ "no-summary", 0, NULL, 'N' },
 	{ "hwpoison"  , 0, NULL, 'X' },
 	{ "unpoison"  , 0, NULL, 'x' },
+	{ "kpageflags", 0, NULL, 'F' },
 	{ "help"      , 0, NULL, 'h' },
 	{ NULL        , 0, NULL, 0 }
 };
@@ -1199,7 +1207,7 @@ int main(int argc, char *argv[])
 	page_size = getpagesize();
 
 	while ((c = getopt_long(argc, argv,
-				"rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) {
+				"rp:f:a:b:d:c:ClLNXxF:h", opts, NULL)) != -1) {
 		switch (c) {
 		case 'r':
 			opt_raw = 1;
@@ -1242,6 +1250,9 @@ int main(int argc, char *argv[])
 			opt_unpoison = 1;
 			prepare_hwpoison_fd();
 			break;
+		case 'F':
+			parse_kpageflags(optarg);
+			break;
 		case 'h':
 			usage();
 			exit(0);
@@ -1251,6 +1262,9 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	if (!opt_kpageflags)
+		opt_kpageflags = PROC_KPAGEFLAGS;
+
 	if (opt_cgroup || opt_list_cgroup)
 		kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY);
 
-- 
cgit v1.2.3


From 62a06994ced17f295fc51ea0815580ee7ccb668d Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 29 Jan 2018 21:23:28 -0800
Subject: tools/bpf: permit selftests/bpf to be built in a different directory

Fix a couple of issues at tools/testing/selftests/bpf/Makefile so
the following command
   make -C tools/testing/selftests/bpf OUTPUT=/home/yhs/tmp
can put the built results into a different directory.

Also add the built binary test_tcpbpf_user in the .gitignore file.

Fixes: 6882804c916b ("selftests/bpf: add a test for overlapping packet range checks")
Fixes: 9d1f15941967 ("bpf: move cgroup_helpers from samples/bpf/ to tools/testing/selftesting/bpf/")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 tools/testing/selftests/bpf/Makefile   | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 1e09d77f1948..cc15af2e54fe 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -8,5 +8,6 @@ fixdep
 test_align
 test_dev_cgroup
 test_progs
+test_tcpbpf_user
 test_verifier_log
 feature
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index bf05bc5e36e5..566d6adc172a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,7 @@ TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
@@ -58,7 +58,7 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
-%.o: %.c
+$(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
 	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
-- 
cgit v1.2.3


From 03ee47ae8a7c608975be7e45287bff0482e295d6 Mon Sep 17 00:00:00 2001
From: Peter Malone <peter.malone@gmail.com>
Date: Thu, 16 Feb 2017 15:42:26 -0500
Subject: ringtest: ring.c malloc & memset to calloc

Code cleanup change - moving from malloc & memset to calloc.

Signed-off-by: Peter Malone <peter.malone@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/ringtest/ring.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
index 2b9859beea65..5a41404aaef5 100644
--- a/tools/virtio/ringtest/ring.c
+++ b/tools/virtio/ringtest/ring.c
@@ -84,12 +84,11 @@ void alloc_ring(void)
 		perror("Unable to allocate ring buffer.\n");
 		exit(3);
 	}
-	event = malloc(sizeof *event);
+	event = calloc(1, sizeof(*event));
 	if (!event) {
 		perror("Unable to allocate event buffer.\n");
 		exit(3);
 	}
-	memset(event, 0, sizeof *event);
 	guest.avail_idx = 0;
 	guest.kicked_avail_idx = -1;
 	guest.last_used_idx = 0;
@@ -102,12 +101,11 @@ void alloc_ring(void)
 		ring[i] = desc;
 	}
 	guest.num_free = ring_size;
-	data = malloc(ring_size * sizeof *data);
+	data = calloc(ring_size, sizeof(*data));
 	if (!data) {
 		perror("Unable to allocate data buffer.\n");
 		exit(3);
 	}
-	memset(data, 0, ring_size * sizeof *data);
 }
 
 /* guest side */
-- 
cgit v1.2.3


From 65073a67331de3d2cce35607807ddec284e75e81 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 31 Jan 2018 12:58:56 +0100
Subject: bpf: fix null pointer deref in bpf_prog_test_run_xdp

syzkaller was able to generate the following XDP program ...

  (18) r0 = 0x0
  (61) r5 = *(u32 *)(r1 +12)
  (04) (u32) r0 += (u32) 0
  (95) exit

... and trigger a NULL pointer dereference in ___bpf_prog_run()
via bpf_prog_test_run_xdp() where this was attempted to run.

Reason is that recent xdp_rxq_info addition to XDP programs
updated all drivers, but not bpf_prog_test_run_xdp(), where
xdp_buff is set up. Thus when context rewriter does the deref
on the netdev it's NULL at runtime. Fix it by using xdp_rxq
from loopback dev. __netif_get_rx_queue() helper can also be
reused in various other locations later on.

Fixes: 02dd3291b2f0 ("bpf: finally expose xdp_rxq_info to XDP bpf-programs")
Reported-by: syzbot+1eb094057b338eb1fc00@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 697bd83de295..c0f16e93f9bd 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -7779,6 +7779,20 @@ static struct bpf_test tests[] = {
 		.errstr = "unknown opcode d7",
 		.result = REJECT,
 	},
+	{
+		"XDP, using ifindex from netdev",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, ingress_ifindex)),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.retval = 1,
+	},
 	{
 		"meta access, test1",
 		.insns = {
-- 
cgit v1.2.3


From f81e1d35a6e36d30888c46283b8dd1022e847124 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 31 Jan 2018 12:45:55 -0700
Subject: nfit-test: Add platform cap support from ACPI 6.2a to test

Adding NFIT platform capabilities sub table in nfit_test simulated ACPI
NFIT table. Only the first NFIT table is added with the capability
sub-table.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 7217b2b953b5..de1373a7ed4f 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -881,7 +881,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
 					window_size) * NUM_DCR
 			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
 			+ (sizeof(struct acpi_nfit_flush_address)
-					+ sizeof(u64) * NUM_HINTS) * NUM_DCR;
+					+ sizeof(u64) * NUM_HINTS) * NUM_DCR
+			+ sizeof(struct acpi_nfit_capabilities);
 	int i;
 
 	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@@ -993,6 +994,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
+	struct acpi_nfit_capabilities *pcap;
 	unsigned int offset, i;
 
 	/*
@@ -1500,8 +1502,16 @@ static void nfit_test0_setup(struct nfit_test *t)
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
 
+	/* platform capabilities */
+	pcap = nfit_buf + offset + flush_hint_size * 4;
+	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
+	pcap->header.length = sizeof(*pcap);
+	pcap->highest_capability = 1;
+	pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
+		ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+
 	if (t->setup_hotplug) {
-		offset = offset + flush_hint_size * 4;
+		offset = offset + flush_hint_size * 4 + sizeof(*pcap);
 		/* dcr-descriptor4: blk */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-- 
cgit v1.2.3


From bfbaa952d1232c6199cdeb4896da67e02a13326d Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 1 Feb 2018 17:41:57 -0700
Subject: libnvdimm/nfit_test: add firmware download emulation

Adding support in nfit_test for DSM v1.6 firmware update sequence. The test
will simulate the flashing of firmware to the DIMM. A bogus version string
will be returned as the test has no idea how to parse the firmware binary.
Any bogus binary can be used to "update" as the actual binary is not copied
into the kernel.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
[ vishal: also move smart calls into the nd_cmd_call block ]
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c      | 322 +++++++++++++++++++++++++++++++---
 tools/testing/nvdimm/test/nfit_test.h |  66 ++++++-
 2 files changed, 360 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 2b57254342aa..a043fea4d58d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -137,6 +137,14 @@ static u32 handle[] = {
 
 static unsigned long dimm_fail_cmd_flags[NUM_DCR];
 
+struct nfit_test_fw {
+	enum intel_fw_update_state state;
+	u32 context;
+	u64 version;
+	u32 size_received;
+	u64 end_time;
+};
+
 struct nfit_test {
 	struct acpi_nfit_desc acpi_desc;
 	struct platform_device pdev;
@@ -172,6 +180,7 @@ struct nfit_test {
 	struct nd_intel_smart_threshold *smart_threshold;
 	struct badrange badrange;
 	struct work_struct work;
+	struct nfit_test_fw *fw;
 };
 
 static struct workqueue_struct *nfit_wq;
@@ -183,6 +192,226 @@ static struct nfit_test *to_nfit_test(struct device *dev)
 	return container_of(pdev, struct nfit_test, pdev);
 }
 
+static int nd_intel_test_get_fw_info(struct nfit_test *t,
+		struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
+		int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
+	nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
+	nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
+	nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
+	nd_cmd->update_cap = 0;
+	nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
+	nd_cmd->run_version = 0;
+	nd_cmd->updated_version = fw->version;
+
+	return 0;
+}
+
+static int nd_intel_test_start_update(struct nfit_test *t,
+		struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
+		int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	if (fw->state != FW_STATE_NEW) {
+		/* extended status, FW update in progress */
+		nd_cmd->status = 0x10007;
+		return 0;
+	}
+
+	fw->state = FW_STATE_IN_PROGRESS;
+	fw->context++;
+	fw->size_received = 0;
+	nd_cmd->status = 0;
+	nd_cmd->context = fw->context;
+
+	dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
+
+	return 0;
+}
+
+static int nd_intel_test_send_data(struct nfit_test *t,
+		struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
+		int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+	u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+
+	dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
+	dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
+	dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
+			nd_cmd->data[nd_cmd->length-1]);
+
+	if (fw->state != FW_STATE_IN_PROGRESS) {
+		dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
+		*status = 0x5;
+		return 0;
+	}
+
+	if (nd_cmd->context != fw->context) {
+		dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+				__func__, nd_cmd->context, fw->context);
+		*status = 0x10007;
+		return 0;
+	}
+
+	/*
+	 * check offset + len > size of fw storage
+	 * check length is > max send length
+	 */
+	if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
+			nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
+		*status = 0x3;
+		dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
+		return 0;
+	}
+
+	fw->size_received += nd_cmd->length;
+	dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
+			__func__, nd_cmd->length, fw->size_received);
+	*status = 0;
+	return 0;
+}
+
+static int nd_intel_test_finish_fw(struct nfit_test *t,
+		struct nd_intel_fw_finish_update *nd_cmd,
+		unsigned int buf_len, int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (fw->state == FW_STATE_UPDATED) {
+		/* update already done, need cold boot */
+		nd_cmd->status = 0x20007;
+		return 0;
+	}
+
+	dev_dbg(dev, "%s: context: %#x  ctrl_flags: %#x\n",
+			__func__, nd_cmd->context, nd_cmd->ctrl_flags);
+
+	switch (nd_cmd->ctrl_flags) {
+	case 0: /* finish */
+		if (nd_cmd->context != fw->context) {
+			dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+					__func__, nd_cmd->context,
+					fw->context);
+			nd_cmd->status = 0x10007;
+			return 0;
+		}
+		nd_cmd->status = 0;
+		fw->state = FW_STATE_VERIFY;
+		/* set 1 second of time for firmware "update" */
+		fw->end_time = jiffies + HZ;
+		break;
+
+	case 1: /* abort */
+		fw->size_received = 0;
+		/* successfully aborted status */
+		nd_cmd->status = 0x40007;
+		fw->state = FW_STATE_NEW;
+		dev_dbg(dev, "%s: abort successful\n", __func__);
+		break;
+
+	default: /* bad control flag */
+		dev_warn(dev, "%s: unknown control flag: %#x\n",
+				__func__, nd_cmd->ctrl_flags);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nd_intel_test_finish_query(struct nfit_test *t,
+		struct nd_intel_fw_finish_query *nd_cmd,
+		unsigned int buf_len, int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	if (nd_cmd->context != fw->context) {
+		dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+				__func__, nd_cmd->context, fw->context);
+		nd_cmd->status = 0x10007;
+		return 0;
+	}
+
+	dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
+
+	switch (fw->state) {
+	case FW_STATE_NEW:
+		nd_cmd->updated_fw_rev = 0;
+		nd_cmd->status = 0;
+		dev_dbg(dev, "%s: new state\n", __func__);
+		break;
+
+	case FW_STATE_IN_PROGRESS:
+		/* sequencing error */
+		nd_cmd->status = 0x40007;
+		nd_cmd->updated_fw_rev = 0;
+		dev_dbg(dev, "%s: sequence error\n", __func__);
+		break;
+
+	case FW_STATE_VERIFY:
+		if (time_is_after_jiffies64(fw->end_time)) {
+			nd_cmd->updated_fw_rev = 0;
+			nd_cmd->status = 0x20007;
+			dev_dbg(dev, "%s: still verifying\n", __func__);
+			break;
+		}
+
+		dev_dbg(dev, "%s: transition out verify\n", __func__);
+		fw->state = FW_STATE_UPDATED;
+		/* we are going to fall through if it's "done" */
+	case FW_STATE_UPDATED:
+		nd_cmd->status = 0;
+		/* bogus test version */
+		fw->version = nd_cmd->updated_fw_rev =
+			INTEL_FW_FAKE_VERSION;
+		dev_dbg(dev, "%s: updated\n", __func__);
+		break;
+
+	default: /* we should never get here */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
 		unsigned int buf_len)
 {
@@ -592,6 +821,23 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
 	return 0;
 }
 
+static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
+{
+	int i;
+
+	/* lookup per-dimm data */
+	for (i = 0; i < ARRAY_SIZE(handle); i++)
+		if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+			break;
+	if (i >= ARRAY_SIZE(handle))
+		return -ENXIO;
+
+	if ((1 << func) & dimm_fail_cmd_flags[i])
+		return -EIO;
+
+	return i;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc)
@@ -620,22 +866,54 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 			func = call_pkg->nd_command;
 			if (call_pkg->nd_family != nfit_mem->family)
 				return -ENOTTY;
+
+			i = get_dimm(nfit_mem, func);
+			if (i < 0)
+				return i;
+
+			switch (func) {
+			case ND_INTEL_FW_GET_INFO:
+				return nd_intel_test_get_fw_info(t, buf,
+						buf_len, i - t->dcr_idx);
+			case ND_INTEL_FW_START_UPDATE:
+				return nd_intel_test_start_update(t, buf,
+						buf_len, i - t->dcr_idx);
+			case ND_INTEL_FW_SEND_DATA:
+				return nd_intel_test_send_data(t, buf,
+						buf_len, i - t->dcr_idx);
+			case ND_INTEL_FW_FINISH_UPDATE:
+				return nd_intel_test_finish_fw(t, buf,
+						buf_len, i - t->dcr_idx);
+			case ND_INTEL_FW_FINISH_QUERY:
+				return nd_intel_test_finish_query(t, buf,
+						buf_len, i - t->dcr_idx);
+			case ND_INTEL_SMART:
+				return nfit_test_cmd_smart(buf, buf_len,
+						&t->smart[i - t->dcr_idx]);
+			case ND_INTEL_SMART_THRESHOLD:
+				return nfit_test_cmd_smart_threshold(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx]);
+			case ND_INTEL_SMART_SET_THRESHOLD:
+				return nfit_test_cmd_smart_set_threshold(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx],
+						&t->smart[i - t->dcr_idx],
+						&t->pdev.dev, t->dimm_dev[i]);
+			default:
+				return -ENOTTY;
+			}
 		}
 
 		if (!test_bit(cmd, &cmd_mask)
 				|| !test_bit(func, &nfit_mem->dsm_mask))
 			return -ENOTTY;
 
-		/* lookup per-dimm data */
-		for (i = 0; i < ARRAY_SIZE(handle); i++)
-			if (__to_nfit_memdev(nfit_mem)->device_handle ==
-					handle[i])
-				break;
-		if (i >= ARRAY_SIZE(handle))
-			return -ENXIO;
-
-		if ((1 << func) & dimm_fail_cmd_flags[i])
-			return -EIO;
+		i = get_dimm(nfit_mem, func);
+		if (i < 0)
+			return i;
 
 		switch (func) {
 		case ND_CMD_GET_CONFIG_SIZE:
@@ -649,20 +927,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 			rc = nfit_test_cmd_set_config_data(buf, buf_len,
 				t->label[i - t->dcr_idx]);
 			break;
-		case ND_INTEL_SMART:
-			rc = nfit_test_cmd_smart(buf, buf_len,
-					&t->smart[i - t->dcr_idx]);
-			break;
-		case ND_INTEL_SMART_THRESHOLD:
-			rc = nfit_test_cmd_smart_threshold(buf, buf_len,
-					&t->smart_threshold[i - t->dcr_idx]);
-			break;
-		case ND_INTEL_SMART_SET_THRESHOLD:
-			rc = nfit_test_cmd_smart_set_threshold(buf, buf_len,
-					&t->smart_threshold[i - t->dcr_idx],
-					&t->smart[i - t->dcr_idx],
-					&t->pdev.dev, t->dimm_dev[i]);
-			break;
 		default:
 			return -ENOTTY;
 		}
@@ -1728,6 +1992,11 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
 	set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
 	set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+	set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
 }
 
 static void nfit_test1_setup(struct nfit_test *t)
@@ -2134,10 +2403,13 @@ static int nfit_test_probe(struct platform_device *pdev)
 		nfit_test->smart_threshold = devm_kcalloc(dev, num,
 				sizeof(struct nd_intel_smart_threshold),
 				GFP_KERNEL);
+		nfit_test->fw = devm_kcalloc(dev, num,
+				sizeof(struct nfit_test_fw), GFP_KERNEL);
 		if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
 				&& nfit_test->label_dma && nfit_test->dcr
 				&& nfit_test->dcr_dma && nfit_test->flush
-				&& nfit_test->flush_dma)
+				&& nfit_test->flush_dma
+				&& nfit_test->fw)
 			/* pass */;
 		else
 			return -ENOMEM;
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index ba230f6f7676..be8fa8ec0615 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -84,9 +84,14 @@ struct nd_cmd_ars_err_inj_stat {
 	} __packed record[0];
 } __packed;
 
-#define ND_INTEL_SMART 1
-#define ND_INTEL_SMART_THRESHOLD 2
-#define ND_INTEL_SMART_SET_THRESHOLD 17
+#define ND_INTEL_SMART			 1
+#define ND_INTEL_SMART_THRESHOLD	 2
+#define ND_INTEL_FW_GET_INFO		12
+#define ND_INTEL_FW_START_UPDATE	13
+#define ND_INTEL_FW_SEND_DATA		14
+#define ND_INTEL_FW_FINISH_UPDATE	15
+#define ND_INTEL_FW_FINISH_QUERY	16
+#define ND_INTEL_SMART_SET_THRESHOLD	17
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -152,6 +157,61 @@ struct nd_intel_smart_set_threshold {
 	__u32 status;
 } __packed;
 
+#define INTEL_FW_STORAGE_SIZE		0x100000
+#define INTEL_FW_MAX_SEND_LEN		0xFFEC
+#define INTEL_FW_QUERY_INTERVAL		250000
+#define INTEL_FW_QUERY_MAX_TIME		3000000
+#define INTEL_FW_FIS_VERSION		0x0105
+#define INTEL_FW_FAKE_VERSION		0xffffffffabcd
+
+enum intel_fw_update_state {
+	FW_STATE_NEW = 0,
+	FW_STATE_IN_PROGRESS,
+	FW_STATE_VERIFY,
+	FW_STATE_UPDATED,
+};
+
+struct nd_intel_fw_info {
+	__u32 status;
+	__u32 storage_size;
+	__u32 max_send_len;
+	__u32 query_interval;
+	__u32 max_query_time;
+	__u8 update_cap;
+	__u8 reserved[3];
+	__u32 fis_version;
+	__u64 run_version;
+	__u64 updated_version;
+} __packed;
+
+struct nd_intel_fw_start {
+	__u32 status;
+	__u32 context;
+} __packed;
+
+/* this one has the output first because the variable input data size */
+struct nd_intel_fw_send_data {
+	__u32 context;
+	__u32 offset;
+	__u32 length;
+	__u8 data[0];
+/* this field is not declared due ot variable data from input */
+/*	__u32 status; */
+} __packed;
+
+struct nd_intel_fw_finish_update {
+	__u8 ctrl_flags;
+	__u8 reserved[3];
+	__u32 context;
+	__u32 status;
+} __packed;
+
+struct nd_intel_fw_finish_query {
+	__u32 context;
+	__u32 status;
+	__u64 updated_fw_rev;
+} __packed;
+
 union acpi_object;
 typedef void *acpi_handle;
 
-- 
cgit v1.2.3


From 674d8bdec770d40288574534eab27d82bdf16b0e Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 1 Feb 2018 17:41:58 -0700
Subject: libnvdimm/nfit_test: adding support for unit testing enable LSS
 status

Adding support code to simulate the enabling of LSS status in support of
the Intel DSM v1.6 Function Index 10: Enable Latch System Shutdown Status.
This is only for testing of libndctl support for LSS enable. The actual
functionality requires a reboot and therefore is not simulated. The enable
value is not recorded in nfit_test since there's no DSM to actually query
the current status of the LSS enable.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c      | 34 ++++++++++++++++++++++++++++++++++
 tools/testing/nvdimm/test/nfit_test.h |  6 ++++++
 2 files changed, 40 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a043fea4d58d..42ee1798971d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -821,6 +821,35 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
 	return 0;
 }
 
+static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
+		struct nd_intel_lss *nd_cmd, unsigned int buf_len)
+{
+	struct device *dev = &t->pdev.dev;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	switch (nd_cmd->enable) {
+	case 0:
+		nd_cmd->status = 0;
+		dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
+				__func__);
+		break;
+	case 1:
+		nd_cmd->status = 0;
+		dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
+				__func__);
+		break;
+	default:
+		dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
+		nd_cmd->status = 0x3;
+		break;
+	}
+
+
+	return 0;
+}
+
 static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
 {
 	int i;
@@ -872,6 +901,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 				return i;
 
 			switch (func) {
+			case ND_INTEL_ENABLE_LSS_STATUS:
+				return nd_intel_test_cmd_set_lss_status(t,
+						buf, buf_len);
 			case ND_INTEL_FW_GET_INFO:
 				return nd_intel_test_get_fw_info(t, buf,
 						buf_len, i - t->dcr_idx);
@@ -1997,6 +2029,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
 }
 
 static void nfit_test1_setup(struct nfit_test *t)
@@ -2094,6 +2127,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index be8fa8ec0615..428344519cdf 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -86,6 +86,7 @@ struct nd_cmd_ars_err_inj_stat {
 
 #define ND_INTEL_SMART			 1
 #define ND_INTEL_SMART_THRESHOLD	 2
+#define ND_INTEL_ENABLE_LSS_STATUS	10
 #define ND_INTEL_FW_GET_INFO		12
 #define ND_INTEL_FW_START_UPDATE	13
 #define ND_INTEL_FW_SEND_DATA		14
@@ -212,6 +213,11 @@ struct nd_intel_fw_finish_query {
 	__u64 updated_fw_rev;
 } __packed;
 
+struct nd_intel_lss {
+	__u8 enable;
+	__u32 status;
+} __packed;
+
 union acpi_object;
 typedef void *acpi_handle;
 
-- 
cgit v1.2.3


From 0fb5c8df609eaca3cb7c24e7f91470f8dd5984ec Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 1 Feb 2018 12:28:54 -0800
Subject: tools/testing/nvdimm: force nfit_test to depend on instrumented
 modules

The libnvdimm unit tests will fail when they are run against the
production / in-tree version of libnvdimm.ko or nfit.ko due to
symbols not being mocked per nfit_test's expectation. For example,
nfit_test expects acpi_evaluate_dsm() to be replaced by
__wrap_acpi_evaluate_dsm() to test how acpi_nfit_ctl() responds to
different stimuli.

Create a test-only symbol name that nfit_test links against to cause
module load failures when the wrong module is present.

For example, with this change, attempts to use the wrong module will
report:

    nfit_test: Unknown symbol libnvdimm_test (err 0)

Reported-by: Dave Jiang <dave.jiang@intel.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/Kbuild            |  4 ++++
 tools/testing/nvdimm/acpi_nfit_test.c  |  8 ++++++++
 tools/testing/nvdimm/device_dax_test.c |  8 ++++++++
 tools/testing/nvdimm/libnvdimm_test.c  |  8 ++++++++
 tools/testing/nvdimm/pmem_test.c       |  8 ++++++++
 tools/testing/nvdimm/test/nfit.c       |  6 ++++++
 tools/testing/nvdimm/watermark.h       | 21 +++++++++++++++++++++
 7 files changed, 63 insertions(+)
 create mode 100644 tools/testing/nvdimm/acpi_nfit_test.c
 create mode 100644 tools/testing/nvdimm/device_dax_test.c
 create mode 100644 tools/testing/nvdimm/libnvdimm_test.c
 create mode 100644 tools/testing/nvdimm/pmem_test.c
 create mode 100644 tools/testing/nvdimm/watermark.h

(limited to 'tools')

diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index db33b28c5ef3..0392153a0009 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
 
 nfit-y := $(ACPI_SRC)/core.o
 nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
+nfit-y += acpi_nfit_test.o
 nfit-y += config_check.o
 
 nd_pmem-y := $(NVDIMM_SRC)/pmem.o
 nd_pmem-y += pmem-dax.o
+nd_pmem-y += pmem_test.o
 nd_pmem-y += config_check.o
 
 nd_btt-y := $(NVDIMM_SRC)/btt.o
@@ -57,6 +59,7 @@ dax-y += config_check.o
 
 device_dax-y := $(DAX_SRC)/device.o
 device_dax-y += dax-dev.o
+device_dax-y += device_dax_test.o
 device_dax-y += config_check.o
 
 dax_pmem-y := $(DAX_SRC)/pmem.o
@@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
 libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
 libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-y += libnvdimm_test.o
 libnvdimm-y += config_check.o
 
 obj-m += test/
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644
index 000000000000..43521512e577
--- /dev/null
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644
index 000000000000..24b17bf42429
--- /dev/null
+++ b/tools/testing/nvdimm/device_dax_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644
index 000000000000..00ca30b23932
--- /dev/null
+++ b/tools/testing/nvdimm/libnvdimm_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644
index 000000000000..fd38f92275cf
--- /dev/null
+++ b/tools/testing/nvdimm/pmem_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(pmem);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 42ee1798971d..450b4cbba6b6 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -27,6 +27,7 @@
 #include <nfit.h>
 #include <nd.h>
 #include "nfit_test.h"
+#include "../watermark.h"
 
 /*
  * Generate an NFIT table to describe the following topology:
@@ -2545,6 +2546,11 @@ static __init int nfit_test_init(void)
 {
 	int rc, i;
 
+	pmem_test();
+	libnvdimm_test();
+	acpi_nfit_test();
+	device_dax_test();
+
 	nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
 
 	nfit_wq = create_singlethread_workqueue("nfit");
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644
index 000000000000..ed0528757bd4
--- /dev/null
+++ b/tools/testing/nvdimm/watermark.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+#ifndef _TEST_NVDIMM_WATERMARK_H_
+#define _TEST_NVDIMM_WATERMARK_H_
+int pmem_test(void);
+int libnvdimm_test(void);
+int acpi_nfit_test(void);
+int device_dax_test(void);
+
+/*
+ * dummy routine for nfit_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define nfit_test_watermark(x)				\
+int x##_test(void)					\
+{							\
+	pr_debug("%s for nfit_test\n", KBUILD_MODNAME);	\
+	return 0;					\
+}							\
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_NVDIMM_WATERMARK_H_ */
-- 
cgit v1.2.3


From dc2b9f19e3bdaa87a7c3d123b8bba8a42d96d942 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Tue, 30 Jan 2018 21:55:00 +0100
Subject: tools: add netlink.h and if_link.h in tools uapi

The headers are necessary for libbpf compilation on system with older
version of the headers.

Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/if_link.h | 943 +++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/netlink.h | 251 ++++++++++
 tools/lib/bpf/Makefile             |   6 +
 3 files changed, 1200 insertions(+)
 create mode 100644 tools/include/uapi/linux/if_link.h
 create mode 100644 tools/include/uapi/linux/netlink.h

(limited to 'tools')

diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
new file mode 100644
index 000000000000..8616131e2c61
--- /dev/null
+++ b/tools/include/uapi/linux/if_link.h
@@ -0,0 +1,943 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IF_LINK_H
+#define _UAPI_LINUX_IF_LINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* This struct should be in sync with struct rtnl_link_stats64 */
+struct rtnl_link_stats {
+	__u32	rx_packets;		/* total packets received	*/
+	__u32	tx_packets;		/* total packets transmitted	*/
+	__u32	rx_bytes;		/* total bytes received 	*/
+	__u32	tx_bytes;		/* total bytes transmitted	*/
+	__u32	rx_errors;		/* bad packets received		*/
+	__u32	tx_errors;		/* packet transmit problems	*/
+	__u32	rx_dropped;		/* no space in linux buffers	*/
+	__u32	tx_dropped;		/* no space available in linux	*/
+	__u32	multicast;		/* multicast packets received	*/
+	__u32	collisions;
+
+	/* detailed rx_errors: */
+	__u32	rx_length_errors;
+	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u32	rx_frame_errors;	/* recv'd frame alignment error */
+	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u32	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u32	tx_aborted_errors;
+	__u32	tx_carrier_errors;
+	__u32	tx_fifo_errors;
+	__u32	tx_heartbeat_errors;
+	__u32	tx_window_errors;
+
+	/* for cslip etc */
+	__u32	rx_compressed;
+	__u32	tx_compressed;
+
+	__u32	rx_nohandler;		/* dropped, no handler found	*/
+};
+
+/* The main device statistics structure */
+struct rtnl_link_stats64 {
+	__u64	rx_packets;		/* total packets received	*/
+	__u64	tx_packets;		/* total packets transmitted	*/
+	__u64	rx_bytes;		/* total bytes received 	*/
+	__u64	tx_bytes;		/* total bytes transmitted	*/
+	__u64	rx_errors;		/* bad packets received		*/
+	__u64	tx_errors;		/* packet transmit problems	*/
+	__u64	rx_dropped;		/* no space in linux buffers	*/
+	__u64	tx_dropped;		/* no space available in linux	*/
+	__u64	multicast;		/* multicast packets received	*/
+	__u64	collisions;
+
+	/* detailed rx_errors: */
+	__u64	rx_length_errors;
+	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u64	rx_frame_errors;	/* recv'd frame alignment error */
+	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u64	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u64	tx_aborted_errors;
+	__u64	tx_carrier_errors;
+	__u64	tx_fifo_errors;
+	__u64	tx_heartbeat_errors;
+	__u64	tx_window_errors;
+
+	/* for cslip etc */
+	__u64	rx_compressed;
+	__u64	tx_compressed;
+
+	__u64	rx_nohandler;		/* dropped, no handler found	*/
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap {
+	__u64	mem_start;
+	__u64	mem_end;
+	__u64	base_addr;
+	__u16	irq;
+	__u8	dma;
+	__u8	port;
+};
+
+/*
+ * IFLA_AF_SPEC
+ *   Contains nested attributes for address family specific attributes.
+ *   Each address family may create a attribute with the address family
+ *   number as type and create its own attribute structure in it.
+ *
+ *   Example:
+ *   [IFLA_AF_SPEC] = {
+ *       [AF_INET] = {
+ *           [IFLA_INET_CONF] = ...,
+ *       },
+ *       [AF_INET6] = {
+ *           [IFLA_INET6_FLAGS] = ...,
+ *           [IFLA_INET6_CONF] = ...,
+ *       }
+ *   }
+ */
+
+enum {
+	IFLA_UNSPEC,
+	IFLA_ADDRESS,
+	IFLA_BROADCAST,
+	IFLA_IFNAME,
+	IFLA_MTU,
+	IFLA_LINK,
+	IFLA_QDISC,
+	IFLA_STATS,
+	IFLA_COST,
+#define IFLA_COST IFLA_COST
+	IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+	IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+	IFLA_PROTINFO,		/* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+	IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+	IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+	IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+	IFLA_OPERSTATE,
+	IFLA_LINKMODE,
+	IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+	IFLA_NET_NS_PID,
+	IFLA_IFALIAS,
+	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
+	IFLA_VFINFO_LIST,
+	IFLA_STATS64,
+	IFLA_VF_PORTS,
+	IFLA_PORT_SELF,
+	IFLA_AF_SPEC,
+	IFLA_GROUP,		/* Group the device belongs to */
+	IFLA_NET_NS_FD,
+	IFLA_EXT_MASK,		/* Extended info mask, VFs, etc */
+	IFLA_PROMISCUITY,	/* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+	IFLA_NUM_TX_QUEUES,
+	IFLA_NUM_RX_QUEUES,
+	IFLA_CARRIER,
+	IFLA_PHYS_PORT_ID,
+	IFLA_CARRIER_CHANGES,
+	IFLA_PHYS_SWITCH_ID,
+	IFLA_LINK_NETNSID,
+	IFLA_PHYS_PORT_NAME,
+	IFLA_PROTO_DOWN,
+	IFLA_GSO_MAX_SEGS,
+	IFLA_GSO_MAX_SIZE,
+	IFLA_PAD,
+	IFLA_XDP,
+	IFLA_EVENT,
+	IFLA_NEW_NETNSID,
+	IFLA_IF_NETNSID,
+	IFLA_CARRIER_UP_COUNT,
+	IFLA_CARRIER_DOWN_COUNT,
+	__IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
+#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
+#endif
+
+enum {
+	IFLA_INET_UNSPEC,
+	IFLA_INET_CONF,
+	__IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
+/* ifi_flags.
+
+   IFF_* flags.
+
+   The only change is:
+   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+   more not changeable by user. They describe link media
+   characteristics and set by device driver.
+
+   Comments:
+   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+   - If neither of these three flags are set;
+     the interface is NBMA.
+
+   - IFF_MULTICAST does not mean anything special:
+   multicasts can be used on all not-NBMA links.
+   IFF_MULTICAST means that this media uses special encapsulation
+   for multicast frames. Apparently, all IFF_POINTOPOINT and
+   IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+   For usual devices it is equal ifi_index.
+   If it is a "virtual interface" (f.e. tunnel), ifi_link
+   can point to real physical interface (f.e. for bandwidth calculations),
+   or maybe 0, what means, that real media is unknown (usual
+   for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum {
+	IFLA_INET6_UNSPEC,
+	IFLA_INET6_FLAGS,	/* link flags			*/
+	IFLA_INET6_CONF,	/* sysctl parameters		*/
+	IFLA_INET6_STATS,	/* statistics			*/
+	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
+	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
+	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
+	IFLA_INET6_TOKEN,	/* device token			*/
+	IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+	__IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
+
+enum in6_addr_gen_mode {
+	IN6_ADDR_GEN_MODE_EUI64,
+	IN6_ADDR_GEN_MODE_NONE,
+	IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+	IN6_ADDR_GEN_MODE_RANDOM,
+};
+
+/* Bridge section */
+
+enum {
+	IFLA_BR_UNSPEC,
+	IFLA_BR_FORWARD_DELAY,
+	IFLA_BR_HELLO_TIME,
+	IFLA_BR_MAX_AGE,
+	IFLA_BR_AGEING_TIME,
+	IFLA_BR_STP_STATE,
+	IFLA_BR_PRIORITY,
+	IFLA_BR_VLAN_FILTERING,
+	IFLA_BR_VLAN_PROTOCOL,
+	IFLA_BR_GROUP_FWD_MASK,
+	IFLA_BR_ROOT_ID,
+	IFLA_BR_BRIDGE_ID,
+	IFLA_BR_ROOT_PORT,
+	IFLA_BR_ROOT_PATH_COST,
+	IFLA_BR_TOPOLOGY_CHANGE,
+	IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+	IFLA_BR_HELLO_TIMER,
+	IFLA_BR_TCN_TIMER,
+	IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+	IFLA_BR_GC_TIMER,
+	IFLA_BR_GROUP_ADDR,
+	IFLA_BR_FDB_FLUSH,
+	IFLA_BR_MCAST_ROUTER,
+	IFLA_BR_MCAST_SNOOPING,
+	IFLA_BR_MCAST_QUERY_USE_IFADDR,
+	IFLA_BR_MCAST_QUERIER,
+	IFLA_BR_MCAST_HASH_ELASTICITY,
+	IFLA_BR_MCAST_HASH_MAX,
+	IFLA_BR_MCAST_LAST_MEMBER_CNT,
+	IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+	IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+	IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+	IFLA_BR_MCAST_QUERIER_INTVL,
+	IFLA_BR_MCAST_QUERY_INTVL,
+	IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+	IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+	IFLA_BR_NF_CALL_IPTABLES,
+	IFLA_BR_NF_CALL_IP6TABLES,
+	IFLA_BR_NF_CALL_ARPTABLES,
+	IFLA_BR_VLAN_DEFAULT_PVID,
+	IFLA_BR_PAD,
+	IFLA_BR_VLAN_STATS_ENABLED,
+	IFLA_BR_MCAST_STATS_ENABLED,
+	IFLA_BR_MCAST_IGMP_VERSION,
+	IFLA_BR_MCAST_MLD_VERSION,
+	__IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX	(__IFLA_BR_MAX - 1)
+
+struct ifla_bridge_id {
+	__u8	prio[2];
+	__u8	addr[6]; /* ETH_ALEN */
+};
+
+enum {
+	BRIDGE_MODE_UNSPEC,
+	BRIDGE_MODE_HAIRPIN,
+};
+
+enum {
+	IFLA_BRPORT_UNSPEC,
+	IFLA_BRPORT_STATE,	/* Spanning tree state     */
+	IFLA_BRPORT_PRIORITY,	/* "             priority  */
+	IFLA_BRPORT_COST,	/* "             cost      */
+	IFLA_BRPORT_MODE,	/* mode (hairpin)          */
+	IFLA_BRPORT_GUARD,	/* bpdu guard              */
+	IFLA_BRPORT_PROTECT,	/* root port protection    */
+	IFLA_BRPORT_FAST_LEAVE,	/* multicast fast leave    */
+	IFLA_BRPORT_LEARNING,	/* mac learning */
+	IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
+	IFLA_BRPORT_PROXYARP,	/* proxy ARP */
+	IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+	IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+	IFLA_BRPORT_ROOT_ID,	/* designated root */
+	IFLA_BRPORT_BRIDGE_ID,	/* designated bridge */
+	IFLA_BRPORT_DESIGNATED_PORT,
+	IFLA_BRPORT_DESIGNATED_COST,
+	IFLA_BRPORT_ID,
+	IFLA_BRPORT_NO,
+	IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+	IFLA_BRPORT_CONFIG_PENDING,
+	IFLA_BRPORT_MESSAGE_AGE_TIMER,
+	IFLA_BRPORT_FORWARD_DELAY_TIMER,
+	IFLA_BRPORT_HOLD_TIMER,
+	IFLA_BRPORT_FLUSH,
+	IFLA_BRPORT_MULTICAST_ROUTER,
+	IFLA_BRPORT_PAD,
+	IFLA_BRPORT_MCAST_FLOOD,
+	IFLA_BRPORT_MCAST_TO_UCAST,
+	IFLA_BRPORT_VLAN_TUNNEL,
+	IFLA_BRPORT_BCAST_FLOOD,
+	IFLA_BRPORT_GROUP_FWD_MASK,
+	IFLA_BRPORT_NEIGH_SUPPRESS,
+	__IFLA_BRPORT_MAX
+};
+#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+
+struct ifla_cacheinfo {
+	__u32	max_reasm_len;
+	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
+	__u32	reachable_time;
+	__u32	retrans_time;
+};
+
+enum {
+	IFLA_INFO_UNSPEC,
+	IFLA_INFO_KIND,
+	IFLA_INFO_DATA,
+	IFLA_INFO_XSTATS,
+	IFLA_INFO_SLAVE_KIND,
+	IFLA_INFO_SLAVE_DATA,
+	__IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
+
+/* VLAN section */
+
+enum {
+	IFLA_VLAN_UNSPEC,
+	IFLA_VLAN_ID,
+	IFLA_VLAN_FLAGS,
+	IFLA_VLAN_EGRESS_QOS,
+	IFLA_VLAN_INGRESS_QOS,
+	IFLA_VLAN_PROTOCOL,
+	__IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX	(__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
+enum {
+	IFLA_VLAN_QOS_UNSPEC,
+	IFLA_VLAN_QOS_MAPPING,
+	__IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX	(__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping {
+	__u32 from;
+	__u32 to;
+};
+
+/* MACVLAN section */
+enum {
+	IFLA_MACVLAN_UNSPEC,
+	IFLA_MACVLAN_MODE,
+	IFLA_MACVLAN_FLAGS,
+	IFLA_MACVLAN_MACADDR_MODE,
+	IFLA_MACVLAN_MACADDR,
+	IFLA_MACVLAN_MACADDR_DATA,
+	IFLA_MACVLAN_MACADDR_COUNT,
+	__IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+	MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
+	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
+	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+	MACVLAN_MODE_SOURCE  = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+	MACVLAN_MACADDR_ADD,
+	MACVLAN_MACADDR_DEL,
+	MACVLAN_MACADDR_FLUSH,
+	MACVLAN_MACADDR_SET,
+};
+
+#define MACVLAN_FLAG_NOPROMISC	1
+
+/* VRF section */
+enum {
+	IFLA_VRF_UNSPEC,
+	IFLA_VRF_TABLE,
+	__IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
+enum {
+	IFLA_VRF_PORT_UNSPEC,
+	IFLA_VRF_PORT_TABLE,
+	__IFLA_VRF_PORT_MAX
+};
+
+#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+
+/* MACSEC section */
+enum {
+	IFLA_MACSEC_UNSPEC,
+	IFLA_MACSEC_SCI,
+	IFLA_MACSEC_PORT,
+	IFLA_MACSEC_ICV_LEN,
+	IFLA_MACSEC_CIPHER_SUITE,
+	IFLA_MACSEC_WINDOW,
+	IFLA_MACSEC_ENCODING_SA,
+	IFLA_MACSEC_ENCRYPT,
+	IFLA_MACSEC_PROTECT,
+	IFLA_MACSEC_INC_SCI,
+	IFLA_MACSEC_ES,
+	IFLA_MACSEC_SCB,
+	IFLA_MACSEC_REPLAY_PROTECT,
+	IFLA_MACSEC_VALIDATION,
+	IFLA_MACSEC_PAD,
+	__IFLA_MACSEC_MAX,
+};
+
+#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+
+enum macsec_validation_type {
+	MACSEC_VALIDATE_DISABLED = 0,
+	MACSEC_VALIDATE_CHECK = 1,
+	MACSEC_VALIDATE_STRICT = 2,
+	__MACSEC_VALIDATE_END,
+	MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+};
+
+/* IPVLAN section */
+enum {
+	IFLA_IPVLAN_UNSPEC,
+	IFLA_IPVLAN_MODE,
+	IFLA_IPVLAN_FLAGS,
+	__IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+	IPVLAN_MODE_L2 = 0,
+	IPVLAN_MODE_L3,
+	IPVLAN_MODE_L3S,
+	IPVLAN_MODE_MAX
+};
+
+#define IPVLAN_F_PRIVATE	0x01
+#define IPVLAN_F_VEPA		0x02
+
+/* VXLAN section */
+enum {
+	IFLA_VXLAN_UNSPEC,
+	IFLA_VXLAN_ID,
+	IFLA_VXLAN_GROUP,	/* group or remote address */
+	IFLA_VXLAN_LINK,
+	IFLA_VXLAN_LOCAL,
+	IFLA_VXLAN_TTL,
+	IFLA_VXLAN_TOS,
+	IFLA_VXLAN_LEARNING,
+	IFLA_VXLAN_AGEING,
+	IFLA_VXLAN_LIMIT,
+	IFLA_VXLAN_PORT_RANGE,	/* source port */
+	IFLA_VXLAN_PROXY,
+	IFLA_VXLAN_RSC,
+	IFLA_VXLAN_L2MISS,
+	IFLA_VXLAN_L3MISS,
+	IFLA_VXLAN_PORT,	/* destination port */
+	IFLA_VXLAN_GROUP6,
+	IFLA_VXLAN_LOCAL6,
+	IFLA_VXLAN_UDP_CSUM,
+	IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+	IFLA_VXLAN_REMCSUM_TX,
+	IFLA_VXLAN_REMCSUM_RX,
+	IFLA_VXLAN_GBP,
+	IFLA_VXLAN_REMCSUM_NOPARTIAL,
+	IFLA_VXLAN_COLLECT_METADATA,
+	IFLA_VXLAN_LABEL,
+	IFLA_VXLAN_GPE,
+	__IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
+
+struct ifla_vxlan_port_range {
+	__be16	low;
+	__be16	high;
+};
+
+/* GENEVE section */
+enum {
+	IFLA_GENEVE_UNSPEC,
+	IFLA_GENEVE_ID,
+	IFLA_GENEVE_REMOTE,
+	IFLA_GENEVE_TTL,
+	IFLA_GENEVE_TOS,
+	IFLA_GENEVE_PORT,	/* destination port */
+	IFLA_GENEVE_COLLECT_METADATA,
+	IFLA_GENEVE_REMOTE6,
+	IFLA_GENEVE_UDP_CSUM,
+	IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+	IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+	IFLA_GENEVE_LABEL,
+	__IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
+
+/* PPP section */
+enum {
+	IFLA_PPP_UNSPEC,
+	IFLA_PPP_DEV_FD,
+	__IFLA_PPP_MAX
+};
+#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
+
+/* GTP section */
+
+enum ifla_gtp_role {
+	GTP_ROLE_GGSN = 0,
+	GTP_ROLE_SGSN,
+};
+
+enum {
+	IFLA_GTP_UNSPEC,
+	IFLA_GTP_FD0,
+	IFLA_GTP_FD1,
+	IFLA_GTP_PDP_HASHSIZE,
+	IFLA_GTP_ROLE,
+	__IFLA_GTP_MAX,
+};
+#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
+
+/* Bonding section */
+
+enum {
+	IFLA_BOND_UNSPEC,
+	IFLA_BOND_MODE,
+	IFLA_BOND_ACTIVE_SLAVE,
+	IFLA_BOND_MIIMON,
+	IFLA_BOND_UPDELAY,
+	IFLA_BOND_DOWNDELAY,
+	IFLA_BOND_USE_CARRIER,
+	IFLA_BOND_ARP_INTERVAL,
+	IFLA_BOND_ARP_IP_TARGET,
+	IFLA_BOND_ARP_VALIDATE,
+	IFLA_BOND_ARP_ALL_TARGETS,
+	IFLA_BOND_PRIMARY,
+	IFLA_BOND_PRIMARY_RESELECT,
+	IFLA_BOND_FAIL_OVER_MAC,
+	IFLA_BOND_XMIT_HASH_POLICY,
+	IFLA_BOND_RESEND_IGMP,
+	IFLA_BOND_NUM_PEER_NOTIF,
+	IFLA_BOND_ALL_SLAVES_ACTIVE,
+	IFLA_BOND_MIN_LINKS,
+	IFLA_BOND_LP_INTERVAL,
+	IFLA_BOND_PACKETS_PER_SLAVE,
+	IFLA_BOND_AD_LACP_RATE,
+	IFLA_BOND_AD_SELECT,
+	IFLA_BOND_AD_INFO,
+	IFLA_BOND_AD_ACTOR_SYS_PRIO,
+	IFLA_BOND_AD_USER_PORT_KEY,
+	IFLA_BOND_AD_ACTOR_SYSTEM,
+	IFLA_BOND_TLB_DYNAMIC_LB,
+	__IFLA_BOND_MAX,
+};
+
+#define IFLA_BOND_MAX	(__IFLA_BOND_MAX - 1)
+
+enum {
+	IFLA_BOND_AD_INFO_UNSPEC,
+	IFLA_BOND_AD_INFO_AGGREGATOR,
+	IFLA_BOND_AD_INFO_NUM_PORTS,
+	IFLA_BOND_AD_INFO_ACTOR_KEY,
+	IFLA_BOND_AD_INFO_PARTNER_KEY,
+	IFLA_BOND_AD_INFO_PARTNER_MAC,
+	__IFLA_BOND_AD_INFO_MAX,
+};
+
+#define IFLA_BOND_AD_INFO_MAX	(__IFLA_BOND_AD_INFO_MAX - 1)
+
+enum {
+	IFLA_BOND_SLAVE_UNSPEC,
+	IFLA_BOND_SLAVE_STATE,
+	IFLA_BOND_SLAVE_MII_STATUS,
+	IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
+	IFLA_BOND_SLAVE_PERM_HWADDR,
+	IFLA_BOND_SLAVE_QUEUE_ID,
+	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+	__IFLA_BOND_SLAVE_MAX,
+};
+
+#define IFLA_BOND_SLAVE_MAX	(__IFLA_BOND_SLAVE_MAX - 1)
+
+/* SR-IOV virtual function management section */
+
+enum {
+	IFLA_VF_INFO_UNSPEC,
+	IFLA_VF_INFO,
+	__IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+	IFLA_VF_UNSPEC,
+	IFLA_VF_MAC,		/* Hardware queue specific attributes */
+	IFLA_VF_VLAN,		/* VLAN ID and QoS */
+	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
+	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
+	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
+	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
+	IFLA_VF_RSS_QUERY_EN,	/* RSS Redirection Table and Hash Key query
+				 * on/off switch
+				 */
+	IFLA_VF_STATS,		/* network device statistics */
+	IFLA_VF_TRUST,		/* Trust VF */
+	IFLA_VF_IB_NODE_GUID,	/* VF Infiniband node GUID */
+	IFLA_VF_IB_PORT_GUID,	/* VF Infiniband port GUID */
+	IFLA_VF_VLAN_LIST,	/* nested list of vlans, option for QinQ */
+	__IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
+struct ifla_vf_mac {
+	__u32 vf;
+	__u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_vlan {
+	__u32 vf;
+	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+	__u32 qos;
+};
+
+enum {
+	IFLA_VF_VLAN_INFO_UNSPEC,
+	IFLA_VF_VLAN_INFO,	/* VLAN ID, QoS and VLAN protocol */
+	__IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+	__u32 vf;
+	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+	__u32 qos;
+	__be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
+struct ifla_vf_tx_rate {
+	__u32 vf;
+	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_rate {
+	__u32 vf;
+	__u32 min_tx_rate; /* Min Bandwidth in Mbps */
+	__u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
+struct ifla_vf_spoofchk {
+	__u32 vf;
+	__u32 setting;
+};
+
+struct ifla_vf_guid {
+	__u32 vf;
+	__u64 guid;
+};
+
+enum {
+	IFLA_VF_LINK_STATE_AUTO,	/* link state of the uplink */
+	IFLA_VF_LINK_STATE_ENABLE,	/* link always up */
+	IFLA_VF_LINK_STATE_DISABLE,	/* link always down */
+	__IFLA_VF_LINK_STATE_MAX,
+};
+
+struct ifla_vf_link_state {
+	__u32 vf;
+	__u32 link_state;
+};
+
+struct ifla_vf_rss_query_en {
+	__u32 vf;
+	__u32 setting;
+};
+
+enum {
+	IFLA_VF_STATS_RX_PACKETS,
+	IFLA_VF_STATS_TX_PACKETS,
+	IFLA_VF_STATS_RX_BYTES,
+	IFLA_VF_STATS_TX_BYTES,
+	IFLA_VF_STATS_BROADCAST,
+	IFLA_VF_STATS_MULTICAST,
+	IFLA_VF_STATS_PAD,
+	IFLA_VF_STATS_RX_DROPPED,
+	IFLA_VF_STATS_TX_DROPPED,
+	__IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+	__u32 vf;
+	__u32 setting;
+};
+
+/* VF ports management section
+ *
+ *	Nested layout of set/get msg is:
+ *
+ *		[IFLA_NUM_VF]
+ *		[IFLA_VF_PORTS]
+ *			[IFLA_VF_PORT]
+ *				[IFLA_PORT_*], ...
+ *			[IFLA_VF_PORT]
+ *				[IFLA_PORT_*], ...
+ *			...
+ *		[IFLA_PORT_SELF]
+ *			[IFLA_PORT_*], ...
+ */
+
+enum {
+	IFLA_VF_PORT_UNSPEC,
+	IFLA_VF_PORT,			/* nest */
+	__IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+	IFLA_PORT_UNSPEC,
+	IFLA_PORT_VF,			/* __u32 */
+	IFLA_PORT_PROFILE,		/* string */
+	IFLA_PORT_VSI_TYPE,		/* 802.1Qbg (pre-)standard VDP */
+	IFLA_PORT_INSTANCE_UUID,	/* binary UUID */
+	IFLA_PORT_HOST_UUID,		/* binary UUID */
+	IFLA_PORT_REQUEST,		/* __u8 */
+	IFLA_PORT_RESPONSE,		/* __u16, output only */
+	__IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX	40
+#define PORT_UUID_MAX		16
+#define PORT_SELF_VF		-1
+
+enum {
+	PORT_REQUEST_PREASSOCIATE = 0,
+	PORT_REQUEST_PREASSOCIATE_RR,
+	PORT_REQUEST_ASSOCIATE,
+	PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+	PORT_VDP_RESPONSE_SUCCESS = 0,
+	PORT_VDP_RESPONSE_INVALID_FORMAT,
+	PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+	PORT_VDP_RESPONSE_UNUSED_VTID,
+	PORT_VDP_RESPONSE_VTID_VIOLATION,
+	PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+	PORT_VDP_RESPONSE_OUT_OF_SYNC,
+	/* 0x08-0xFF reserved for future VDP use */
+	PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+	PORT_PROFILE_RESPONSE_INPROGRESS,
+	PORT_PROFILE_RESPONSE_INVALID,
+	PORT_PROFILE_RESPONSE_BADSTATE,
+	PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+	PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+	__u8 vsi_mgr_id;
+	__u8 vsi_type_id[3];
+	__u8 vsi_type_version;
+	__u8 pad[3];
+};
+
+
+/* IPoIB section */
+
+enum {
+	IFLA_IPOIB_UNSPEC,
+	IFLA_IPOIB_PKEY,
+	IFLA_IPOIB_MODE,
+	IFLA_IPOIB_UMCAST,
+	__IFLA_IPOIB_MAX
+};
+
+enum {
+	IPOIB_MODE_DATAGRAM  = 0, /* using unreliable datagram QPs */
+	IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
+
+/* HSR section */
+
+enum {
+	IFLA_HSR_UNSPEC,
+	IFLA_HSR_SLAVE1,
+	IFLA_HSR_SLAVE2,
+	IFLA_HSR_MULTICAST_SPEC,	/* Last byte of supervision addr */
+	IFLA_HSR_SUPERVISION_ADDR,	/* Supervision frame multicast addr */
+	IFLA_HSR_SEQ_NR,
+	IFLA_HSR_VERSION,		/* HSR version */
+	__IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
+/* STATS section */
+
+struct if_stats_msg {
+	__u8  family;
+	__u8  pad1;
+	__u16 pad2;
+	__u32 ifindex;
+	__u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+	IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
+	IFLA_STATS_LINK_64,
+	IFLA_STATS_LINK_XSTATS,
+	IFLA_STATS_LINK_XSTATS_SLAVE,
+	IFLA_STATS_LINK_OFFLOAD_XSTATS,
+	IFLA_STATS_AF_SPEC,
+	__IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR - 1))
+
+/* These are embedded into IFLA_STATS_LINK_XSTATS:
+ * [IFLA_STATS_LINK_XSTATS]
+ * -> [LINK_XSTATS_TYPE_xxx]
+ *    -> [rtnl link type specific attributes]
+ */
+enum {
+	LINK_XSTATS_TYPE_UNSPEC,
+	LINK_XSTATS_TYPE_BRIDGE,
+	__LINK_XSTATS_TYPE_MAX
+};
+#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
+
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+	IFLA_OFFLOAD_XSTATS_UNSPEC,
+	IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+	__IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
+/* XDP section */
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
+#define XDP_FLAGS_SKB_MODE		(1U << 1)
+#define XDP_FLAGS_DRV_MODE		(1U << 2)
+#define XDP_FLAGS_HW_MODE		(1U << 3)
+#define XDP_FLAGS_MODES			(XDP_FLAGS_SKB_MODE | \
+					 XDP_FLAGS_DRV_MODE | \
+					 XDP_FLAGS_HW_MODE)
+#define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
+					 XDP_FLAGS_MODES)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+	XDP_ATTACHED_NONE = 0,
+	XDP_ATTACHED_DRV,
+	XDP_ATTACHED_SKB,
+	XDP_ATTACHED_HW,
+};
+
+enum {
+	IFLA_XDP_UNSPEC,
+	IFLA_XDP_FD,
+	IFLA_XDP_ATTACHED,
+	IFLA_XDP_FLAGS,
+	IFLA_XDP_PROG_ID,
+	__IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
+enum {
+	IFLA_EVENT_NONE,
+	IFLA_EVENT_REBOOT,		/* internal reset / reboot */
+	IFLA_EVENT_FEATURES,		/* change in offload features */
+	IFLA_EVENT_BONDING_FAILOVER,	/* change in active slave */
+	IFLA_EVENT_NOTIFY_PEERS,	/* re-sent grat. arp/ndisc */
+	IFLA_EVENT_IGMP_RESEND,		/* re-sent IGMP JOIN */
+	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
+};
+
+#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
new file mode 100644
index 000000000000..776bc92e9118
--- /dev/null
+++ b/tools/include/uapi/linux/netlink.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
+
+#include <linux/kernel.h>
+#include <linux/socket.h> /* for __kernel_sa_family_t */
+#include <linux/types.h>
+
+#define NETLINK_ROUTE		0	/* Routing/device hook				*/
+#define NETLINK_UNUSED		1	/* Unused number				*/
+#define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
+#define NETLINK_FIREWALL	3	/* Unused number, formerly ip_queue		*/
+#define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
+#define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
+#define NETLINK_XFRM		6	/* ipsec */
+#define NETLINK_SELINUX		7	/* SELinux event notifications */
+#define NETLINK_ISCSI		8	/* Open-iSCSI */
+#define NETLINK_AUDIT		9	/* auditing */
+#define NETLINK_FIB_LOOKUP	10	
+#define NETLINK_CONNECTOR	11
+#define NETLINK_NETFILTER	12	/* netfilter subsystem */
+#define NETLINK_IP6_FW		13
+#define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
+#define NETLINK_GENERIC		16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
+#define NETLINK_ECRYPTFS	19
+#define NETLINK_RDMA		20
+#define NETLINK_CRYPTO		21	/* Crypto layer */
+#define NETLINK_SMC		22	/* SMC monitoring */
+
+#define NETLINK_INET_DIAG	NETLINK_SOCK_DIAG
+
+#define MAX_LINKS 32		
+
+struct sockaddr_nl {
+	__kernel_sa_family_t	nl_family;	/* AF_NETLINK	*/
+	unsigned short	nl_pad;		/* zero		*/
+	__u32		nl_pid;		/* port ID	*/
+       	__u32		nl_groups;	/* multicast groups mask */
+};
+
+struct nlmsghdr {
+	__u32		nlmsg_len;	/* Length of message including header */
+	__u16		nlmsg_type;	/* Message content */
+	__u16		nlmsg_flags;	/* Additional flags */
+	__u32		nlmsg_seq;	/* Sequence number */
+	__u32		nlmsg_pid;	/* Sending process port ID */
+};
+
+/* Flags values */
+
+#define NLM_F_REQUEST		0x01	/* It is request message. 	*/
+#define NLM_F_MULTI		0x02	/* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK		0x04	/* Reply with ack, with zero or error code */
+#define NLM_F_ECHO		0x08	/* Echo this request 		*/
+#define NLM_F_DUMP_INTR		0x10	/* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED	0x20	/* Dump was filtered as requested */
+
+/* Modifiers to GET request */
+#define NLM_F_ROOT	0x100	/* specify tree	root	*/
+#define NLM_F_MATCH	0x200	/* return all matching	*/
+#define NLM_F_ATOMIC	0x400	/* atomic GET		*/
+#define NLM_F_DUMP	(NLM_F_ROOT|NLM_F_MATCH)
+
+/* Modifiers to NEW request */
+#define NLM_F_REPLACE	0x100	/* Override existing		*/
+#define NLM_F_EXCL	0x200	/* Do not touch, if it exists	*/
+#define NLM_F_CREATE	0x400	/* Create, if it does not exist	*/
+#define NLM_F_APPEND	0x800	/* Add to end of list		*/
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC	0x100	/* Do not delete recursively	*/
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED	0x100	/* request was capped */
+#define NLM_F_ACK_TLVS	0x200	/* extended ACK TVLs were included */
+
+/*
+   4.4BSD ADD		NLM_F_CREATE|NLM_F_EXCL
+   4.4BSD CHANGE	NLM_F_REPLACE
+
+   True CHANGE		NLM_F_CREATE|NLM_F_REPLACE
+   Append		NLM_F_CREATE
+   Check		NLM_F_EXCL
+ */
+
+#define NLMSG_ALIGNTO	4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN	 ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh)  ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len)	 ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+				  (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+			   (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+			   (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP		0x1	/* Nothing.		*/
+#define NLMSG_ERROR		0x2	/* Error		*/
+#define NLMSG_DONE		0x3	/* End of a dump	*/
+#define NLMSG_OVERRUN		0x4	/* Data lost		*/
+
+#define NLMSG_MIN_TYPE		0x10	/* < 0x10: reserved control messages */
+
+struct nlmsgerr {
+	int		error;
+	struct nlmsghdr msg;
+	/*
+	 * followed by the message contents unless NETLINK_CAP_ACK was set
+	 * or the ACK indicates success (error == 0)
+	 * message length is aligned with NLMSG_ALIGN()
+	 */
+	/*
+	 * followed by TLVs defined in enum nlmsgerr_attrs
+	 * if NETLINK_EXT_ACK was set
+	 */
+};
+
+/**
+ * enum nlmsgerr_attrs - nlmsgerr attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original
+ *	 message, counting from the beginning of the header (u32)
+ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
+ *	be used - in the success case - to identify a created
+ *	object or operation or similar (binary)
+ * @__NLMSGERR_ATTR_MAX: number of attributes
+ * @NLMSGERR_ATTR_MAX: highest attribute number
+ */
+enum nlmsgerr_attrs {
+	NLMSGERR_ATTR_UNUSED,
+	NLMSGERR_ATTR_MSG,
+	NLMSGERR_ATTR_OFFS,
+	NLMSGERR_ATTR_COOKIE,
+
+	__NLMSGERR_ATTR_MAX,
+	NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#define NETLINK_ADD_MEMBERSHIP		1
+#define NETLINK_DROP_MEMBERSHIP		2
+#define NETLINK_PKTINFO			3
+#define NETLINK_BROADCAST_ERROR		4
+#define NETLINK_NO_ENOBUFS		5
+#ifndef __KERNEL__
+#define NETLINK_RX_RING			6
+#define NETLINK_TX_RING			7
+#endif
+#define NETLINK_LISTEN_ALL_NSID		8
+#define NETLINK_LIST_MEMBERSHIPS	9
+#define NETLINK_CAP_ACK			10
+#define NETLINK_EXT_ACK			11
+
+struct nl_pktinfo {
+	__u32	group;
+};
+
+struct nl_mmap_req {
+	unsigned int	nm_block_size;
+	unsigned int	nm_block_nr;
+	unsigned int	nm_frame_size;
+	unsigned int	nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+	unsigned int	nm_status;
+	unsigned int	nm_len;
+	__u32		nm_group;
+	/* credentials */
+	__u32		nm_pid;
+	__u32		nm_uid;
+	__u32		nm_gid;
+};
+
+#ifndef __KERNEL__
+enum nl_mmap_status {
+	NL_MMAP_STATUS_UNUSED,
+	NL_MMAP_STATUS_RESERVED,
+	NL_MMAP_STATUS_VALID,
+	NL_MMAP_STATUS_COPY,
+	NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT		NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz)		__ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN			NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
+
+#define NET_MAJOR 36		/* Major 36 is reserved for networking 						*/
+
+enum {
+	NETLINK_UNCONNECTED = 0,
+	NETLINK_CONNECTED,
+};
+
+/*
+ *  <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * |        Header       | Pad |     Payload       | Pad |
+ * |   (struct nlattr)   | ing |                   | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ *  <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr {
+	__u16           nla_len;
+	__u16           nla_type;
+};
+
+/*
+ * nla_type (16 bits)
+ * +---+---+-------------------------------+
+ * | N | O | Attribute Type                |
+ * +---+---+-------------------------------+
+ * N := Carries nested attributes
+ * O := Payload stored in network byte order
+ *
+ * Note: The N and O flag are mutually exclusive.
+ */
+#define NLA_F_NESTED		(1 << 15)
+#define NLA_F_NET_BYTEORDER	(1 << 14)
+#define NLA_TYPE_MASK		~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO		4
+#define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  value = 0x0, and selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  value = 0x2, and selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+	__u32 value;
+	__u32 selector;
+};
+
+#endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 83714ca1f22b..e6d5f8d1477f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -160,6 +160,12 @@ $(BPF_IN): force elfdep bpfdep
 	@(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \
 	(diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \
 	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true
+	@(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \
+	(diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true
+	@(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \
+	(diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \
+	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true
 	$(Q)$(MAKE) $(build)=libbpf
 
 $(OUTPUT)libbpf.so: $(BPF_IN)
-- 
cgit v1.2.3


From 949abbe88436c000cc63fce2bdfeb48b7d06a7df Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Tue, 30 Jan 2018 21:55:01 +0100
Subject: libbpf: add function to setup XDP

Most of the code is taken from set_link_xdp_fd() in bpf_load.c and
slightly modified to be library compliant.

Signed-off-by: Eric Leblond <eric@regit.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c    | 122 +++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.c |   2 +
 tools/lib/bpf/libbpf.h |   4 ++
 3 files changed, 128 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5128677e4117..bf2772566240 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -25,6 +25,12 @@
 #include <asm/unistd.h>
 #include <linux/bpf.h>
 #include "bpf.h"
+#include "libbpf.h"
+#include "nlattr.h"
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+#include <sys/socket.h>
+#include <errno.h>
 
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
@@ -46,7 +52,9 @@
 # endif
 #endif
 
+#ifndef min
 #define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
 
 static inline __u64 ptr_to_u64(const void *ptr)
 {
@@ -413,3 +421,117 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 
 	return err;
 }
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	struct sockaddr_nl sa;
+	int sock, seq = 0, len, ret = -1;
+	char buf[4096];
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+	socklen_t addrlen;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		return -errno;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	addrlen = sizeof(sa);
+	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	if (addrlen != sizeof(sa)) {
+		ret = -LIBBPF_ERRNO__INTERNAL;
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = IFLA_XDP_FD;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = IFLA_XDP_FLAGS;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != sa.nl_pid) {
+			ret = -LIBBPF_ERRNO__WRNGPID;
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			ret = -LIBBPF_ERRNO__INVSEQ;
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			ret = err->error;
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		default:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 30c776375118..c60122d3ea85 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -106,6 +106,8 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
 	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
 	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
 	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
+	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
+	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
 };
 
 int libbpf_strerror(int err, char *buf, size_t size)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6e20003109e0..e42f96900318 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -42,6 +42,8 @@ enum libbpf_errno {
 	LIBBPF_ERRNO__PROG2BIG,	/* Program too big */
 	LIBBPF_ERRNO__KVER,	/* Incorrect kernel version */
 	LIBBPF_ERRNO__PROGTYPE,	/* Kernel doesn't support this program type */
+	LIBBPF_ERRNO__WRNGPID,	/* Wrong pid in netlink message */
+	LIBBPF_ERRNO__INVSEQ,	/* Invalid netlink sequence */
 	__LIBBPF_ERRNO__END,
 };
 
@@ -246,4 +248,6 @@ long libbpf_get_error(const void *ptr);
 
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
 		  struct bpf_object **pobj, int *prog_fd);
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
 #endif
-- 
cgit v1.2.3


From bbf48c18ee0cd18b53712aa09aefa29b64b3976e Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Tue, 30 Jan 2018 21:55:02 +0100
Subject: libbpf: add error reporting in XDP

Parse netlink ext attribute to get the error message returned by
the card. Code is partially take from libnl.

We add netlink.h to the uapi include of tools. And we need to
avoid include of userspace netlink header to have a successful
build of sample so nlattr.h has a define to avoid
the inclusion. Using a direct define could have been an issue
as NLMSGERR_ATTR_MAX can change in the future.

We also define SOL_NETLINK if not defined to avoid to have to
copy socket.h for a fixed value.

Signed-off-by: Eric Leblond <eric@regit.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Build    |   2 +-
 tools/lib/bpf/bpf.c    |  11 +++
 tools/lib/bpf/nlattr.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/nlattr.h |  72 +++++++++++++++++++
 4 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/nlattr.c
 create mode 100644 tools/lib/bpf/nlattr.h

(limited to 'tools')

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index d8749756352d..64c679d67109 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o
+libbpf-y := libbpf.o bpf.o nlattr.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index bf2772566240..9c88f6e4156d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -32,6 +32,10 @@
 #include <sys/socket.h>
 #include <errno.h>
 
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
  * required to be defined explicitly.
@@ -436,6 +440,7 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 	struct nlmsghdr *nh;
 	struct nlmsgerr *err;
 	socklen_t addrlen;
+	int one = 1;
 
 	memset(&sa, 0, sizeof(sa));
 	sa.nl_family = AF_NETLINK;
@@ -445,6 +450,11 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 		return -errno;
 	}
 
+	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+		       &one, sizeof(one)) < 0) {
+		fprintf(stderr, "Netlink error reporting not supported\n");
+	}
+
 	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
 		ret = -errno;
 		goto cleanup;
@@ -521,6 +531,7 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 			if (!err->error)
 				continue;
 			ret = err->error;
+			nla_dump_errormsg(nh);
 			goto cleanup;
 		case NLMSG_DONE:
 			break;
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
new file mode 100644
index 000000000000..4719434278b2
--- /dev/null
+++ b/tools/lib/bpf/nlattr.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * NETLINK      Netlink attributes
+ *
+ *	This library is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU Lesser General Public
+ *	License as published by the Free Software Foundation version 2.1
+ *	of the License.
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <errno.h>
+#include "nlattr.h"
+#include <linux/rtnetlink.h>
+#include <string.h>
+#include <stdio.h>
+
+static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = {
+	[NLA_U8]	= sizeof(uint8_t),
+	[NLA_U16]	= sizeof(uint16_t),
+	[NLA_U32]	= sizeof(uint32_t),
+	[NLA_U64]	= sizeof(uint64_t),
+	[NLA_STRING]	= 1,
+	[NLA_FLAG]	= 0,
+};
+
+static int nla_len(const struct nlattr *nla)
+{
+	return nla->nla_len - NLA_HDRLEN;
+}
+
+static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+	int totlen = NLA_ALIGN(nla->nla_len);
+
+	*remaining -= totlen;
+	return (struct nlattr *) ((char *) nla + totlen);
+}
+
+static int nla_ok(const struct nlattr *nla, int remaining)
+{
+	return remaining >= sizeof(*nla) &&
+	       nla->nla_len >= sizeof(*nla) &&
+	       nla->nla_len <= remaining;
+}
+
+static void *nla_data(const struct nlattr *nla)
+{
+	return (char *) nla + NLA_HDRLEN;
+}
+
+static int nla_type(const struct nlattr *nla)
+{
+	return nla->nla_type & NLA_TYPE_MASK;
+}
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+			struct nla_policy *policy)
+{
+	struct nla_policy *pt;
+	unsigned int minlen = 0;
+	int type = nla_type(nla);
+
+	if (type < 0 || type > maxtype)
+		return 0;
+
+	pt = &policy[type];
+
+	if (pt->type > NLA_TYPE_MAX)
+		return 0;
+
+	if (pt->minlen)
+		minlen = pt->minlen;
+	else if (pt->type != NLA_UNSPEC)
+		minlen = nla_attr_minlen[pt->type];
+
+	if (nla_len(nla) < minlen)
+		return -1;
+
+	if (pt->maxlen && nla_len(nla) > pt->maxlen)
+		return -1;
+
+	if (pt->type == NLA_STRING) {
+		char *data = nla_data(nla);
+		if (data[nla_len(nla) - 1] != '\0')
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+	return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * Create attribute index based on a stream of attributes.
+ * @arg tb		Index array to be filled (maxtype+1 elements).
+ * @arg maxtype		Maximum attribute type expected and accepted.
+ * @arg head		Head of attribute stream.
+ * @arg len		Length of attribute stream.
+ * @arg policy		Attribute validation policy.
+ *
+ * Iterates over the stream of attributes and stores a pointer to each
+ * attribute in the index array using the attribute type as index to
+ * the array. Attribute with a type greater than the maximum type
+ * specified will be silently ignored in order to maintain backwards
+ * compatibility. If \a policy is not NULL, the attribute will be
+ * validated using the specified policy.
+ *
+ * @see nla_validate
+ * @return 0 on success or a negative error code.
+ */
+static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+		     struct nla_policy *policy)
+{
+	struct nlattr *nla;
+	int rem, err;
+
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+	nla_for_each_attr(nla, head, len, rem) {
+		int type = nla_type(nla);
+
+		if (type > maxtype)
+			continue;
+
+		if (policy) {
+			err = validate_nla(nla, maxtype, policy);
+			if (err < 0)
+				goto errout;
+		}
+
+		if (tb[type])
+			fprintf(stderr, "Attribute of type %#x found multiple times in message, "
+				  "previous attribute is being ignored.\n", type);
+
+		tb[type] = nla;
+	}
+
+	err = 0;
+errout:
+	return err;
+}
+
+/* dump netlink extended ack error message */
+int nla_dump_errormsg(struct nlmsghdr *nlh)
+{
+	struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+		[NLMSGERR_ATTR_MSG]	= { .type = NLA_STRING },
+		[NLMSGERR_ATTR_OFFS]	= { .type = NLA_U32 },
+	};
+	struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
+	struct nlmsgerr *err;
+	char *errmsg = NULL;
+	int hlen, alen;
+
+	/* no TLVs, nothing to do here */
+	if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+		return 0;
+
+	err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+	hlen = sizeof(*err);
+
+	/* if NLM_F_CAPPED is set then the inner err msg was capped */
+	if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+		hlen += nlmsg_len(&err->msg);
+
+	attr = (struct nlattr *) ((void *) err + hlen);
+	alen = nlh->nlmsg_len - hlen;
+
+	if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) {
+		fprintf(stderr,
+			"Failed to parse extended error attributes\n");
+		return 0;
+	}
+
+	if (tb[NLMSGERR_ATTR_MSG])
+		errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+	fprintf(stderr, "Kernel error message: %s\n", errmsg);
+
+	return 0;
+}
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
new file mode 100644
index 000000000000..931a71f68f93
--- /dev/null
+++ b/tools/lib/bpf/nlattr.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * NETLINK      Netlink attributes
+ *
+ *	This library is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU Lesser General Public
+ *	License as published by the Free Software Foundation version 2.1
+ *	of the License.
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#ifndef __NLATTR_H
+#define __NLATTR_H
+
+#include <stdint.h>
+#include <linux/netlink.h>
+/* avoid multiple definition of netlink features */
+#define __LINUX_NETLINK_H
+
+/**
+ * Standard attribute types to specify validation policy
+ */
+enum {
+	NLA_UNSPEC,	/**< Unspecified type, binary data chunk */
+	NLA_U8,		/**< 8 bit integer */
+	NLA_U16,	/**< 16 bit integer */
+	NLA_U32,	/**< 32 bit integer */
+	NLA_U64,	/**< 64 bit integer */
+	NLA_STRING,	/**< NUL terminated character string */
+	NLA_FLAG,	/**< Flag */
+	NLA_MSECS,	/**< Micro seconds (64bit) */
+	NLA_NESTED,	/**< Nested attributes */
+	__NLA_TYPE_MAX,
+};
+
+#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+
+/**
+ * @ingroup attr
+ * Attribute validation policy.
+ *
+ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
+ */
+struct nla_policy {
+	/** Type of attribute or NLA_UNSPEC */
+	uint16_t	type;
+
+	/** Minimal length of payload required */
+	uint16_t	minlen;
+
+	/** Maximal length of payload allowed */
+	uint16_t	maxlen;
+};
+
+/**
+ * @ingroup attr
+ * Iterate over a stream of attributes
+ * @arg pos	loop counter, set to current attribute
+ * @arg head	head of attribute stream
+ * @arg len	length of attribute stream
+ * @arg rem	initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_attr(pos, head, len, rem) \
+	for (pos = head, rem = len; \
+	     nla_ok(pos, rem); \
+	     pos = nla_next(pos, &(rem)))
+
+int nla_dump_errormsg(struct nlmsghdr *nlh);
+
+#endif /* __NLATTR_H */
-- 
cgit v1.2.3


From 6061a3d6720600c976b877c3ac1402b3ef0a8a55 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Tue, 30 Jan 2018 21:55:03 +0100
Subject: libbpf: add missing SPDX-License-Identifier

Signed-off-by: Eric Leblond <eric@regit.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c    | 2 ++
 tools/lib/bpf/bpf.h    | 2 ++
 tools/lib/bpf/libbpf.c | 2 ++
 tools/lib/bpf/libbpf.h | 2 ++
 4 files changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9c88f6e4156d..592a58a2b681 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: LGPL-2.1
+
 /*
  * common eBPF ELF operations.
  *
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9f44c196931e..8d18fb73d7fb 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
 /*
  * common eBPF ELF operations.
  *
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index c60122d3ea85..71ddc481f349 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: LGPL-2.1
+
 /*
  * Common eBPF ELF object loading operations.
  *
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index e42f96900318..f85906533cdd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
 /*
  * Common eBPF ELF object loading operations.
  *
-- 
cgit v1.2.3


From 667ca1ec7c9eb7ac3b80590b6597151b4c2a750b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 29 Jan 2018 15:20:10 -0500
Subject: membarrier/selftest: Test private expedited command

Test the new MEMBARRIER_CMD_PRIVATE_EXPEDITED and
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED commands.

Add checks expecting specific error values on system calls expected to
fail.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alice Ferrazzi <alice.ferrazzi@gmail.com>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Elder <paul.elder@pitt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Link: http://lkml.kernel.org/r/20180129202020.8515-2-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 .../testing/selftests/membarrier/membarrier_test.c | 111 ++++++++++++++++++---
 1 file changed, 95 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index 9e674d9514d1..e6ee73d01fa1 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -16,49 +16,119 @@ static int sys_membarrier(int cmd, int flags)
 static int test_membarrier_cmd_fail(void)
 {
 	int cmd = -1, flags = 0;
+	const char *test_name = "sys membarrier invalid command";
 
 	if (sys_membarrier(cmd, flags) != -1) {
 		ksft_exit_fail_msg(
-			"sys membarrier invalid command test: command = %d, flags = %d. Should fail, but passed\n",
-			cmd, flags);
+			"%s test: command = %d, flags = %d. Should fail, but passed\n",
+			test_name, cmd, flags);
+	}
+	if (errno != EINVAL) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EINVAL, strerror(EINVAL),
+			errno, strerror(errno));
 	}
 
 	ksft_test_result_pass(
-		"sys membarrier invalid command test: command = %d, flags = %d. Failed as expected\n",
-		cmd, flags);
+		"%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
+		test_name, cmd, flags, errno);
 	return 0;
 }
 
 static int test_membarrier_flags_fail(void)
 {
 	int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
 
 	if (sys_membarrier(cmd, flags) != -1) {
 		ksft_exit_fail_msg(
-			"sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Should fail, but passed\n",
-			flags);
+			"%s test: flags = %d. Should fail, but passed\n",
+			test_name, flags);
+	}
+	if (errno != EINVAL) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EINVAL, strerror(EINVAL),
+			errno, strerror(errno));
 	}
 
 	ksft_test_result_pass(
-		"sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Failed as expected\n",
-		flags);
+		"%s test: flags = %d, errno = %d. Failed as expected\n",
+		test_name, flags, errno);
 	return 0;
 }
 
-static int test_membarrier_success(void)
+static int test_membarrier_shared_success(void)
 {
 	int cmd = MEMBARRIER_CMD_SHARED, flags = 0;
-	const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED\n";
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n", test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_fail(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
+
+	if (sys_membarrier(cmd, flags) != -1) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should fail, but passed\n",
+			test_name, flags);
+	}
+	if (errno != EPERM) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EPERM, strerror(EPERM),
+			errno, strerror(errno));
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d, errno = %d\n",
+		test_name, flags, errno);
+	return 0;
+}
+
+static int test_membarrier_register_private_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
 
 	if (sys_membarrier(cmd, flags) != 0) {
 		ksft_exit_fail_msg(
-			"sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n",
-			flags);
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
 	}
 
 	ksft_test_result_pass(
-		"sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n",
-		flags);
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
 	return 0;
 }
 
@@ -72,7 +142,16 @@ static int test_membarrier(void)
 	status = test_membarrier_flags_fail();
 	if (status)
 		return status;
-	status = test_membarrier_success();
+	status = test_membarrier_shared_success();
+	if (status)
+		return status;
+	status = test_membarrier_private_expedited_fail();
+	if (status)
+		return status;
+	status = test_membarrier_register_private_expedited_success();
+	if (status)
+		return status;
+	status = test_membarrier_private_expedited_success();
 	if (status)
 		return status;
 	return 0;
@@ -108,5 +187,5 @@ int main(int argc, char **argv)
 	test_membarrier_query();
 	test_membarrier();
 
-	ksft_exit_pass();
+	return ksft_exit_pass();
 }
-- 
cgit v1.2.3


From 92485487ba834f6665ec13dfb2c69e80cd0c7c37 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 29 Jan 2018 15:20:14 -0500
Subject: membarrier/selftest: Test global expedited command

Test the new MEMBARRIER_CMD_GLOBAL_EXPEDITED and
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED commands.

Adapt to the MEMBARRIER_CMD_SHARED -> MEMBARRIER_CMD_GLOBAL rename.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alice Ferrazzi <alice.ferrazzi@gmail.com>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Elder <paul.elder@pitt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Link: http://lkml.kernel.org/r/20180129202020.8515-6-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 .../testing/selftests/membarrier/membarrier_test.c | 59 ++++++++++++++++++++--
 1 file changed, 54 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index e6ee73d01fa1..eec39a0e1f77 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -59,10 +59,10 @@ static int test_membarrier_flags_fail(void)
 	return 0;
 }
 
-static int test_membarrier_shared_success(void)
+static int test_membarrier_global_success(void)
 {
-	int cmd = MEMBARRIER_CMD_SHARED, flags = 0;
-	const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED";
+	int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
 
 	if (sys_membarrier(cmd, flags) != 0) {
 		ksft_exit_fail_msg(
@@ -132,6 +132,40 @@ static int test_membarrier_private_expedited_success(void)
 	return 0;
 }
 
+static int test_membarrier_register_global_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_global_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
 static int test_membarrier(void)
 {
 	int status;
@@ -142,7 +176,7 @@ static int test_membarrier(void)
 	status = test_membarrier_flags_fail();
 	if (status)
 		return status;
-	status = test_membarrier_shared_success();
+	status = test_membarrier_global_success();
 	if (status)
 		return status;
 	status = test_membarrier_private_expedited_fail();
@@ -152,6 +186,19 @@ static int test_membarrier(void)
 	if (status)
 		return status;
 	status = test_membarrier_private_expedited_success();
+	if (status)
+		return status;
+	/*
+	 * It is valid to send a global membarrier from a non-registered
+	 * process.
+	 */
+	status = test_membarrier_global_expedited_success();
+	if (status)
+		return status;
+	status = test_membarrier_register_global_expedited_success();
+	if (status)
+		return status;
+	status = test_membarrier_global_expedited_success();
 	if (status)
 		return status;
 	return 0;
@@ -173,8 +220,10 @@ static int test_membarrier_query(void)
 		}
 		ksft_exit_fail_msg("sys_membarrier() failed\n");
 	}
-	if (!(ret & MEMBARRIER_CMD_SHARED))
+	if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
+		ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n");
 		ksft_exit_fail_msg("sys_membarrier is not supported.\n");
+	}
 
 	ksft_test_result_pass("sys_membarrier available\n");
 	return 0;
-- 
cgit v1.2.3


From 460e8c3340a265d1d70fa1ee05c42afd68b2efa0 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 29 Jan 2018 15:20:20 -0500
Subject: membarrier/selftest: Test private expedited sync core command

Test the new MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE and
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE commands.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alice Ferrazzi <alice.ferrazzi@gmail.com>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Elder <paul.elder@pitt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Link: http://lkml.kernel.org/r/20180129202020.8515-12-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 .../testing/selftests/membarrier/membarrier_test.c | 73 ++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index eec39a0e1f77..22bffd55a523 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -132,6 +132,63 @@ static int test_membarrier_private_expedited_success(void)
 	return 0;
 }
 
+static int test_membarrier_private_expedited_sync_core_fail(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
+
+	if (sys_membarrier(cmd, flags) != -1) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should fail, but passed\n",
+			test_name, flags);
+	}
+	if (errno != EPERM) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EPERM, strerror(EPERM),
+			errno, strerror(errno));
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d, errno = %d\n",
+		test_name, flags, errno);
+	return 0;
+}
+
+static int test_membarrier_register_private_expedited_sync_core_success(void)
+{
+	int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_success(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
 static int test_membarrier_register_global_expedited_success(void)
 {
 	int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
@@ -188,6 +245,22 @@ static int test_membarrier(void)
 	status = test_membarrier_private_expedited_success();
 	if (status)
 		return status;
+	status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+	if (status < 0) {
+		ksft_test_result_fail("sys_membarrier() failed\n");
+		return status;
+	}
+	if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+		status = test_membarrier_private_expedited_sync_core_fail();
+		if (status)
+			return status;
+		status = test_membarrier_register_private_expedited_sync_core_success();
+		if (status)
+			return status;
+		status = test_membarrier_private_expedited_sync_core_success();
+		if (status)
+			return status;
+	}
 	/*
 	 * It is valid to send a global membarrier from a non-registered
 	 * process.
-- 
cgit v1.2.3


From da6f8320d58623eae9b6fa2f09b1b4f60a772ce9 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 4 Jan 2018 10:06:38 -0800
Subject: ACPICA: All acpica: Update copyrights to 2018

including tool signons.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/acpi/common/cmfsize.c                        | 2 +-
 tools/power/acpi/common/getopt.c                         | 2 +-
 tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 2 +-
 tools/power/acpi/os_specific/service_layers/osunixdir.c  | 2 +-
 tools/power/acpi/os_specific/service_layers/osunixmap.c  | 2 +-
 tools/power/acpi/os_specific/service_layers/osunixxf.c   | 2 +-
 tools/power/acpi/tools/acpidump/acpidump.h               | 2 +-
 tools/power/acpi/tools/acpidump/apdump.c                 | 2 +-
 tools/power/acpi/tools/acpidump/apfiles.c                | 2 +-
 tools/power/acpi/tools/acpidump/apmain.c                 | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 5b38dc2fec4f..dfa6fee1b915 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 6e78413bb2cb..f7032c9ab35e 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 52a39ecf5ca1..e7347edfd4f9 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
index ea14eaeb268f..9b5e61b636d9 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixdir.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index cf9b5a54df92..8b26924e2602 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 025c1b07049d..34c044da433c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index d6aa40fce2b1..3c679607d1c3 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 0634449156d8..9ad1712e4cf9 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index d686e11936c4..856e1b83407b 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index be418fba9441..f4ef826800cf 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2017, Intel Corp.
+ * Copyright (C) 2000 - 2018, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
-- 
cgit v1.2.3


From 7b4eb53d95c30bdc55c44647c6968f24227fd1ba Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 5 Feb 2018 16:25:24 -0800
Subject: tools/bpf: fix batch-mode test failure of test_xdp_redirect.sh

The tests at tools/testing/selftests/bpf can run in patch mode, e.g.,
    make -C tools/testing/selftests/bpf run_tests

With the batch mode, I experimented intermittent test failure of
test_xdp_redirect.sh.
    ....
    selftests: test_xdp_redirect [PASS]
    selftests: test_xdp_redirect.sh [PASS]
    RTNETLINK answers: File exists
    selftests: test_xdp_meta [FAILED]
    selftests: test_xdp_meta.sh [FAIL]
    ....

The following illustrates what caused the failure:
     (1). test_xdp_redirect creates veth pairs (veth1,veth11) and
          (veth2,veth22), and assign veth11 and veth22 to namespace
          ns1 and ns2 respectively.
     (2). at the end of test_xdp_redirect test, ns1 and ns2 are
          deleted. During this process, the deletion of actual
          namespace resources, including deletion of veth1{1} and veth2{2},
          is put into a workqueue to be processed asynchronously.
     (3). test_xdp_meta tries to create veth pair (veth1, veth2).
          The previous veth deletions in step (2) have not finished yet,
          and veth1 or veth2 may be still valid in the kernel, thus
          causing the failure.

The fix is to explicitly delete the veth pair before test_xdp_redirect
exits. Only one end of veth needs deletion as the kernel will delete
the other end automatically. Also test_xdp_meta is also fixed in
similar manner to avoid future potential issues.

Fixes: 996139e801fd ("selftests: bpf: add a test for XDP redirect")
Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_xdp_meta.sh     | 1 +
 tools/testing/selftests/bpf/test_xdp_redirect.sh | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index 307aa856cee3..637fcf4fe4e3 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -9,6 +9,7 @@ cleanup()
 	fi
 
 	set +e
+	ip link del veth1 2> /dev/null
 	ip netns del ns1 2> /dev/null
 	ip netns del ns2 2> /dev/null
 }
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index 344a3656dea6..c4b17e08d431 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -19,6 +19,8 @@ cleanup()
 	fi
 
 	set +e
+	ip link del veth1 2> /dev/null
+	ip link del veth2 2> /dev/null
 	ip netns del ns1 2> /dev/null
 	ip netns del ns2 2> /dev/null
 }
-- 
cgit v1.2.3


From 051803c0d01f4d8a197ecc08bfeffdff00d86c62 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Nov 2017 23:06:32 -0400
Subject: radix tree test suite: Remove ARRAY_SIZE

This is now defined in tools/include/linux/kernel.h, so our
definition generates a warning.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux/kernel.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index c3bc3f364f68..426f32f28547 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -17,6 +17,4 @@
 #define pr_debug printk
 #define pr_cont printk
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
 #endif /* _KERNEL_H */
-- 
cgit v1.2.3


From 490645d027c5925b30c88b9c7a663850a641d15d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 9 Nov 2017 20:15:14 -0500
Subject: idr test suite: Fix ida_test_random()

The test was checking the wrong errno; ida_get_new_above() returns
EAGAIN, not ENOMEM on memory allocation failure.  Double the number of
threads to increase the chance that we actually exercise this path
during the test suite (it was a bit sporadic before).

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 30cd0b296f1a..193450b29bf0 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -380,7 +380,7 @@ void ida_check_random(void)
 			do {
 				ida_pre_get(&ida, GFP_KERNEL);
 				err = ida_get_new_above(&ida, bit, &id);
-			} while (err == -ENOMEM);
+			} while (err == -EAGAIN);
 			assert(!err);
 			assert(id == bit);
 		}
@@ -489,7 +489,7 @@ static void *ida_random_fn(void *arg)
 
 void ida_thread_tests(void)
 {
-	pthread_t threads[10];
+	pthread_t threads[20];
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(threads); i++)
-- 
cgit v1.2.3


From 6e6d301490936789ff57daaaaf63f44d928a4028 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Tue, 28 Nov 2017 14:27:14 -0500
Subject: IDR test suite: Check handling negative end correctly

One of the charming quirks of the idr_alloc() interface is that you
can pass a negative end and it will be interpreted as "maximum".  Ensure
we don't break that.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 193450b29bf0..892ef8855b02 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -207,6 +207,7 @@ void idr_checks(void)
 		assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
 	}
 	assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+	assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC);
 
 	idr_for_each(&idr, item_idr_free, &idr);
 	idr_destroy(&idr);
-- 
cgit v1.2.3


From 460488c58ca8b9167463ac22ec9a2e33db351962 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Tue, 28 Nov 2017 15:16:24 -0500
Subject: idr: Remove idr_alloc_ext

It has no more users, so remove it.  Move idr_alloc() back into idr.c,
move the guts of idr_alloc_cmn() into idr_alloc_u32(), remove the
wrappers around idr_get_free_cmn() and rename it to idr_get_free().
While there is now no interface to allocate IDs larger than a u32,
the IDR internals remain ready to handle a larger ID should a need arise.

These changes make it possible to provide the guarantee that, if the
nextid pointer points into the object, the object's ID will be initialised
before a concurrent lookup can find the object.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 892ef8855b02..36437ade429c 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -215,6 +215,23 @@ void idr_checks(void)
 
 	assert(idr_is_empty(&idr));
 
+	idr_set_cursor(&idr, INT_MAX - 3UL);
+	for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) {
+		struct item *item;
+		unsigned int id;
+		if (i <= INT_MAX)
+			item = item_create(i, 0);
+		else
+			item = item_create(i - INT_MAX - 1, 0);
+
+		id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL);
+		assert(id == item->index);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
 	for (i = 1; i < 10000; i++) {
 		struct item *item = item_create(i, 0);
 		assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
-- 
cgit v1.2.3


From 6ce711f2750031d12cec91384ac5cfa0a485b60a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 30 Nov 2017 13:45:11 -0500
Subject: idr: Make 1-based IDRs more efficient

About 20% of the IDR users in the kernel want the allocated IDs to start
at 1.  The implementation currently searches all the way down the left
hand side of the tree, finds no free ID other than ID 0, walks all the
way back up, and then all the way down again.  This patch 'rebases' the
ID so we fill the entire radix tree, rather than leave a gap at 0.

Chris Wilson says: "I did the quick hack of allocating index 0 of the
idr and that eradicated idr_get_free() from being at the top of the
profiles for the many-object stress tests. This improvement will be
much appreciated."

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 36437ade429c..44ef9eba5a7a 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -153,11 +153,12 @@ void idr_nowait_test(void)
 	idr_destroy(&idr);
 }
 
-void idr_get_next_test(void)
+void idr_get_next_test(int base)
 {
 	unsigned long i;
 	int nextid;
 	DEFINE_IDR(idr);
+	idr_init_base(&idr, base);
 
 	int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
 
@@ -244,7 +245,9 @@ void idr_checks(void)
 	idr_alloc_test();
 	idr_null_test();
 	idr_nowait_test();
-	idr_get_next_test();
+	idr_get_next_test(0);
+	idr_get_next_test(1);
+	idr_get_next_test(4);
 }
 
 /*
-- 
cgit v1.2.3


From ad343a98e74e85aa91d844310e797f96fee6983b Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 6 Feb 2018 15:37:52 -0800
Subject: tools/lib/subcmd/pager.c: do not alias select() params

Use a separate fd set for select()-s exception fds param to fix the
following gcc warning:

  pager.c:36:12: error: passing argument 2 to restrict-qualified parameter aliases with argument 4 [-Werror=restrict]
    select(1, &in, NULL, &in, NULL);
              ^~~        ~~~

Link: http://lkml.kernel.org/r/20180101105626.7168-1-sergey.senozhatsky@gmail.com
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/lib/subcmd/pager.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
index 5ba754d17952..9997a8805a82 100644
--- a/tools/lib/subcmd/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -30,10 +30,13 @@ static void pager_preexec(void)
 	 * have real input
 	 */
 	fd_set in;
+	fd_set exception;
 
 	FD_ZERO(&in);
+	FD_ZERO(&exception);
 	FD_SET(0, &in);
-	select(1, &in, NULL, &in, NULL);
+	FD_SET(0, &exception);
+	select(1, &in, NULL, &exception, NULL);
 
 	setenv("LESS", "FRSX", 0);
 }
-- 
cgit v1.2.3


From 0ade34c37012ea5c516d9aa4d19a56e9f40a55ed Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet@google.com>
Date: Tue, 6 Feb 2018 15:38:34 -0800
Subject: lib: optimize cpumask_next_and()

We've measured that we spend ~0.6% of sys cpu time in cpumask_next_and().
It's essentially a joined iteration in search for a non-zero bit, which is
currently implemented as a lookup join (find a nonzero bit on the lhs,
lookup the rhs to see if it's set there).

Implement a direct join (find a nonzero bit on the incrementally built
join).  Also add generic bitmap benchmarks in the new `test_find_bit`
module for new function (see `find_next_and_bit` in [2] and [3] below).

For cpumask_next_and, direct benchmarking shows that it's 1.17x to 14x
faster with a geometric mean of 2.1 on 32 CPUs [1].  No impact on memory
usage.  Note that on Arm, the new pure-C implementation still outperforms
the old one that uses a mix of C and asm (`find_next_bit`) [3].

[1] Approximate benchmark code:

```
  unsigned long src1p[nr_cpumask_longs] = {pattern1};
  unsigned long src2p[nr_cpumask_longs] = {pattern2};
  for (/*a bunch of repetitions*/) {
    for (int n = -1; n <= nr_cpu_ids; ++n) {
      asm volatile("" : "+rm"(src1p)); // prevent any optimization
      asm volatile("" : "+rm"(src2p));
      unsigned long result = cpumask_next_and(n, src1p, src2p);
      asm volatile("" : "+rm"(result));
    }
  }
```

Results:
pattern1    pattern2     time_before/time_after
0x0000ffff  0x0000ffff   1.65
0x0000ffff  0x00005555   2.24
0x0000ffff  0x00001111   2.94
0x0000ffff  0x00000000   14.0
0x00005555  0x0000ffff   1.67
0x00005555  0x00005555   1.71
0x00005555  0x00001111   1.90
0x00005555  0x00000000   6.58
0x00001111  0x0000ffff   1.46
0x00001111  0x00005555   1.49
0x00001111  0x00001111   1.45
0x00001111  0x00000000   3.10
0x00000000  0x0000ffff   1.18
0x00000000  0x00005555   1.18
0x00000000  0x00001111   1.17
0x00000000  0x00000000   1.25
-----------------------------
               geo.mean  2.06

[2] test_find_next_bit, X86 (skylake)

 [ 3913.477422] Start testing find_bit() with random-filled bitmap
 [ 3913.477847] find_next_bit: 160868 cycles, 16484 iterations
 [ 3913.477933] find_next_zero_bit: 169542 cycles, 16285 iterations
 [ 3913.478036] find_last_bit: 201638 cycles, 16483 iterations
 [ 3913.480214] find_first_bit: 4353244 cycles, 16484 iterations
 [ 3913.480216] Start testing find_next_and_bit() with random-filled
 bitmap
 [ 3913.481074] find_next_and_bit: 89604 cycles, 8216 iterations
 [ 3913.481075] Start testing find_bit() with sparse bitmap
 [ 3913.481078] find_next_bit: 2536 cycles, 66 iterations
 [ 3913.481252] find_next_zero_bit: 344404 cycles, 32703 iterations
 [ 3913.481255] find_last_bit: 2006 cycles, 66 iterations
 [ 3913.481265] find_first_bit: 17488 cycles, 66 iterations
 [ 3913.481266] Start testing find_next_and_bit() with sparse bitmap
 [ 3913.481272] find_next_and_bit: 764 cycles, 1 iterations

[3] test_find_next_bit, arm (v7 odroid XU3).

[  267.206928] Start testing find_bit() with random-filled bitmap
[  267.214752] find_next_bit: 4474 cycles, 16419 iterations
[  267.221850] find_next_zero_bit: 5976 cycles, 16350 iterations
[  267.229294] find_last_bit: 4209 cycles, 16419 iterations
[  267.279131] find_first_bit: 1032991 cycles, 16420 iterations
[  267.286265] Start testing find_next_and_bit() with random-filled
bitmap
[  267.302386] find_next_and_bit: 2290 cycles, 8140 iterations
[  267.309422] Start testing find_bit() with sparse bitmap
[  267.316054] find_next_bit: 191 cycles, 66 iterations
[  267.322726] find_next_zero_bit: 8758 cycles, 32703 iterations
[  267.329803] find_last_bit: 84 cycles, 66 iterations
[  267.336169] find_first_bit: 4118 cycles, 66 iterations
[  267.342627] Start testing find_next_and_bit() with sparse bitmap
[  267.356919] find_next_and_bit: 91 cycles, 1 iterations

[courbet@google.com: v6]
  Link: http://lkml.kernel.org/r/20171129095715.23430-1-courbet@google.com
[geert@linux-m68k.org: m68k/bitops: always include <asm-generic/bitops/find.h>]
  Link: http://lkml.kernel.org/r/1512556816-28627-1-git-send-email-geert@linux-m68k.org
Link: http://lkml.kernel.org/r/20171128131334.23491-1-courbet@google.com
Signed-off-by: Clement Courbet <courbet@google.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/include/asm-generic/bitops/find.h | 16 ++++++++++++++
 tools/lib/find_bit.c                    | 39 ++++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
index 9311fadaaab2..16ed1982cb34 100644
--- a/tools/include/asm-generic/bitops/find.h
+++ b/tools/include/asm-generic/bitops/find.h
@@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
 		size, unsigned long offset);
 #endif
 
+#ifndef find_next_and_bit
+/**
+ * find_next_and_bit - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset);
+#endif
+
 #ifndef find_next_zero_bit
 
 /**
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 42c15f906aac..a88bd507091e 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -22,22 +22,29 @@
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
 
-#if !defined(find_next_bit)
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
+		!defined(find_next_and_bit)
 
 /*
- * This is a common helper function for find_next_bit and
- * find_next_zero_bit.  The difference is the "invert" argument, which
- * is XORed with each fetched word before searching it for one bits.
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
  */
-static unsigned long _find_next_bit(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= BITMAP_FIRST_WORD_MASK(start);
@@ -48,7 +55,10 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(tmp), nbits);
@@ -62,7 +72,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, 0UL);
+	return _find_next_bit(addr, NULL, size, offset, 0UL);
 }
 #endif
 
@@ -104,6 +114,15 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, ~0UL);
+	return _find_next_bit(addr, NULL, size, offset, ~0UL);
+}
+#endif
+
+#ifndef find_next_and_bit
+unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit(addr1, addr2, size, offset, 0UL);
 }
 #endif
-- 
cgit v1.2.3


From 035d808f7cfd578dc210b584beb17e365f34d39a Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Wed, 7 Feb 2018 23:45:34 +0530
Subject: selftests: bpf: test_kmod.sh: check the module path before insmod

test_kmod.sh reported false failure when module not present.
Check test_bpf.ko is present in the path before loading it.

Two cases to be addressed here,
In the development process of test_bpf.c unit testing will be done by
developers by using "insmod $SRC_TREE/lib/test_bpf.ko"

On the other hand testers run full tests by installing modules on device
under test (DUT) and followed by modprobe to insert the modules accordingly.

Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_kmod.sh | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index ed4774d8d6ed..35669ccd4d23 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -10,9 +10,21 @@ test_run()
 
 	echo "[ JIT enabled:$1 hardened:$2 ]"
 	dmesg -C
-	insmod $SRC_TREE/lib/test_bpf.ko 2> /dev/null
-	if [ $? -ne 0 ]; then
-		rc=1
+	if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
+		insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+		if [ $? -ne 0 ]; then
+			rc=1
+		fi
+	else
+		# Use modprobe dry run to check for missing test_bpf module
+		if ! /sbin/modprobe -q -n test_bpf; then
+			echo "test_bpf: [SKIP]"
+		elif /sbin/modprobe -q test_bpf; then
+			echo "test_bpf: ok"
+		else
+			echo "test_bpf: [FAIL]"
+			rc=1
+		fi
 	fi
 	rmmod  test_bpf 2> /dev/null
 	dmesg | grep FAIL
-- 
cgit v1.2.3


From 0badd331491097cc3702d05a6dd0264a434712b2 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Feb 2018 20:27:13 -0800
Subject: libbpf: complete list of strings for guessing program type

It seems that the type guessing feature for libbpf, based on the name of
the ELF section the program is located in, was inspired from
samples/bpf/prog_load.c, which was not used by any sample for loading
programs of certain types such as TC actions and classifiers, or
LWT-related types. As a consequence, libbpf is not able to guess the
type of such programs and to load them automatically if type is not
provided to the `bpf_load_prog()` function.

Add ELF section names associated to those eBPF program types so that
they can be loaded with e.g. bpftool as well.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 71ddc481f349..c64840365433 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1816,12 +1816,17 @@ static const struct {
 	BPF_PROG_SEC("socket",		BPF_PROG_TYPE_SOCKET_FILTER),
 	BPF_PROG_SEC("kprobe/",		BPF_PROG_TYPE_KPROBE),
 	BPF_PROG_SEC("kretprobe/",	BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("classifier",	BPF_PROG_TYPE_SCHED_CLS),
+	BPF_PROG_SEC("action",		BPF_PROG_TYPE_SCHED_ACT),
 	BPF_PROG_SEC("tracepoint/",	BPF_PROG_TYPE_TRACEPOINT),
 	BPF_PROG_SEC("xdp",		BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",	BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("cgroup/skb",	BPF_PROG_TYPE_CGROUP_SKB),
 	BPF_PROG_SEC("cgroup/sock",	BPF_PROG_TYPE_CGROUP_SOCK),
 	BPF_PROG_SEC("cgroup/dev",	BPF_PROG_TYPE_CGROUP_DEVICE),
+	BPF_PROG_SEC("lwt_in",		BPF_PROG_TYPE_LWT_IN),
+	BPF_PROG_SEC("lwt_out",		BPF_PROG_TYPE_LWT_OUT),
+	BPF_PROG_SEC("lwt_xmit",	BPF_PROG_TYPE_LWT_XMIT),
 	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
 };
-- 
cgit v1.2.3


From 92426820f7616caebdf6ba665f749102a670b3d4 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Feb 2018 20:27:14 -0800
Subject: tools: bpftool: exit doc Makefile early if rst2man is not available

If rst2man is not available on the system, running `make doc` from the
bpftool directory fails with an error message. However, it creates empty
manual pages (.8 files in this case). A subsequent call to `make
doc-install` would then succeed and install those empty man pages on the
system.

To prevent this, raise a Makefile error and exit immediately if rst2man
is not available before generating the pages from the rst documentation.

Fixes: ff69c21a85a4 ("tools: bpftool: add documentation")
Reported-by: Jason van Aaardt <jason.vanaardt@netronome.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index c462a928e03d..a9d47c1558bb 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -23,7 +23,12 @@ DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
 man: man8
 man8: $(DOC_MAN8)
 
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
 $(OUTPUT)%.8: %.rst
+ifndef RST2MAN_DEP
+	$(error "rst2man not found, but required to generate man pages")
+endif
 	$(QUIET_GEN)rst2man $< > $@
 
 clean:
-- 
cgit v1.2.3


From 2148481d5d3111e5e9aa5adc1905fa3539e548c8 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Feb 2018 20:27:15 -0800
Subject: tools: bpftool: make syntax for program map update explicit in man
 page

Specify in the documentation that when using bpftool to update a map of
type BPF_MAP_TYPE_PROG_ARRAY, the syntax for the program used as a value
should use the "id|tag|pinned" keywords convention, as used with
"bpftool prog" commands.

Fixes: ff69c21a85a4 ("tools: bpftool: add documentation")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 0ab32b312aec..457e868bd32f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -31,7 +31,8 @@ MAP COMMANDS
 |	**bpftool** **map help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
-|	*VALUE* := { *BYTES* | *MAP* | *PROGRAM* }
+|	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+|	*VALUE* := { *BYTES* | *MAP* | *PROG* }
 |	*UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
 
 DESCRIPTION
-- 
cgit v1.2.3


From 55f3538c4923e9dfca132e99ebec370e8094afda Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Feb 2018 20:27:16 -0800
Subject: tools: bpftool: add bash completion for `bpftool prog load`

Add bash completion for bpftool command `prog load`. Completion for this
command is easy, as it only takes existing file paths as arguments.

Fixes: 49a086c201a9 ("bpftool: implement prog load command")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/bash-completion/bpftool | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 0137866bb8f6..17d246418348 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -230,10 +230,14 @@ _bpftool()
                     fi
                     return 0
                     ;;
+                load)
+                    _filedir
+                    return 0
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin show list' -- \
-                            "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'dump help pin load \
+                            show list' -- "$cur" ) )
                     ;;
             esac
             ;;
-- 
cgit v1.2.3


From a827a164b98be2acba6a2e7559643ac9a5745086 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 7 Feb 2018 20:27:17 -0800
Subject: tools: bpftool: add bash completion for cgroup commands

Add bash completion for "bpftool cgroup" command family. While at it,
also fix the formatting of some keywords in the man page for cgroups.

Fixes: 5ccda64d38cc ("bpftool: implement cgroup bpf operations")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst |  4 +-
 tools/bpf/bpftool/bash-completion/bpftool          | 64 ++++++++++++++++++++--
 2 files changed, 62 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 2fe2a1bdbe3e..0e4e923235b6 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -26,8 +26,8 @@ MAP COMMANDS
 |	**bpftool** **cgroup help**
 |
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-|	*ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* }
-|	*ATTACH_FLAGS* := { *multi* | *override* }
+|	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** }
+|	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 17d246418348..08719c54a614 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -52,16 +52,24 @@ _bpftool_once_attr()
     done
 }
 
-# Takes a list of words in argument; adds them all to COMPREPLY if none of them
-# is already present on the command line. Returns no value.
-_bpftool_one_of_list()
+# Takes a list of words as argument; if any of those words is present on the
+# command line, return 0. Otherwise, return 1.
+_bpftool_search_list()
 {
     local w idx
     for w in $*; do
         for (( idx=3; idx < ${#words[@]}-1; idx++ )); do
-            [[ $w == ${words[idx]} ]] && return 1
+            [[ $w == ${words[idx]} ]] && return 0
         done
     done
+    return 1
+}
+
+# Takes a list of words in argument; adds them all to COMPREPLY if none of them
+# is already present on the command line. Returns no value.
+_bpftool_one_of_list()
+{
+    _bpftool_search_list $* && return 1
     COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) )
 }
 
@@ -351,6 +359,54 @@ _bpftool()
                     ;;
             esac
             ;;
+        cgroup)
+            case $command in
+                show|list)
+                    _filedir
+                    return 0
+                    ;;
+                attach|detach)
+                    local ATTACH_TYPES='ingress egress sock_create sock_ops \
+                        device'
+                    local ATTACH_FLAGS='multi override'
+                    local PROG_TYPE='id pinned tag'
+                    case $prev in
+                        $command)
+                            _filedir
+                            return 0
+                            ;;
+                        ingress|egress|sock_create|sock_ops|device)
+                            COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
+                                "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            _bpftool_get_prog_ids
+                            return 0
+                            ;;
+                        *)
+                            if ! _bpftool_search_list "$ATTACH_TYPES"; then
+                                COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
+                                    "$cur" ) )
+                            elif [[ "$command" == "attach" ]]; then
+                                # We have an attach type on the command line,
+                                # but it is not the previous word, or
+                                # "id|pinned|tag" (we already checked for
+                                # that). This should only leave the case when
+                                # we need attach flags for "attach" commamnd.
+                                _bpftool_one_of_list "$ATTACH_FLAGS"
+                            fi
+                            return 0
+                            ;;
+                    esac
+                    ;;
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help attach detach \
+                            show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
     esac
 } &&
 complete -F _bpftool bpftool
-- 
cgit v1.2.3


From df9d36d9e8adefe0975c7b5888967651b3db3af3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 6 Feb 2018 17:10:12 -0500
Subject: selftests/ftrace: Have reset_ftrace_filter handle modules

If a function probe in set_ftrace_filter belongs to a module, it will
contain the module name. Like:

 wmi_query_block [wmi]:traceoff:unlimited

But writing:

 '!wmi_query_block [wmi]:traceoff' > set_ftrace_filter

will cause an error. We still need to write:

 '!wmi_query_block:traceoff' > set_ftrace_filter

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/selftests/ftrace/test.d/functions | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index f2019b37370d..e7c4c7b752a2 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -37,17 +37,18 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
 	if [ "$tr" = "" ]; then
 	    continue
 	fi
+	name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
 	if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
-	    tr=`echo $t | cut -d: -f1-4`
+	    tr=`echo $t | cut -d: -f2-4`
 	    limit=`echo $t | cut -d: -f5`
 	else
-	    tr=`echo $t | cut -d: -f1-2`
+	    tr=`echo $t | cut -d: -f2`
 	    limit=`echo $t | cut -d: -f3`
 	fi
 	if [ "$limit" != "unlimited" ]; then
 	    tr="$tr:$limit"
 	fi
-	echo "!$tr" > set_ftrace_filter
+	echo "!$name:$tr" > set_ftrace_filter
     done
 }
 
-- 
cgit v1.2.3


From 0787ce336075ac0fa2d356de4e3c2a2488e851a6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 6 Feb 2018 17:15:02 -0500
Subject: selftests/ftrace: Have reset_ftrace_filter handle multiple instances

If a probe is attached to a static function that is in multiple files with
the same name, removing it by name will remove all instances:

 # grep jump_label_unlock set_ftrace_filter
jump_label_unlock:traceoff:unlimited
jump_label_unlock:traceoff:unlimited

 # echo '!jump_label_unlock:traceoff' >> set_ftrace_filter
 # grep jump_label_unlock set_ftrace_filter
 #

But the loop in reset_ftrace_filter will try to remove multiple instances
multiple times. If this happens the second time will error and cause the
test to fail.

At each iteration of the loop, check to see if the probe being removed still
exists.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/selftests/ftrace/test.d/functions | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index e7c4c7b752a2..df3dd7fe5f9b 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -37,6 +37,9 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
 	if [ "$tr" = "" ]; then
 	    continue
 	fi
+	if ! grep -q "$t" set_ftrace_filter; then
+		continue;
+	fi
 	name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
 	if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
 	    tr=`echo $t | cut -d: -f2-4`
-- 
cgit v1.2.3


From 97fe22adf33f06519bfdf7dad33bcd562e366c8f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 6 Feb 2018 17:19:03 -0500
Subject: selftests/ftrace: Add some missing glob checks

Al Viro discovered a bug in the glob ftrace filtering code where "*a*b" is
treated the same as "a*b", and functions that would be selected by "*a*b"
but not "a*b" are not selected with "*a*b".

Add tests for patterns "*a*b" and "a*b*" to the glob selftest.

Link: http://lkml.kernel.org/r/20180127170748.GF13338@ZenIV.linux.org.uk

Cc: Shuah Khan <shuah@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 589d52b211b7..27a54a17da65 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -29,6 +29,12 @@ ftrace_filter_check '*schedule*' '^.*schedule.*$'
 # filter by *, end match
 ftrace_filter_check 'schedule*' '^schedule.*$'
 
+# filter by *mid*end
+ftrace_filter_check '*aw*lock' '.*aw.*lock$'
+
+# filter by start*mid*
+ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+
 # Advanced full-glob matching feature is recently supported.
 # Skip the tests if we are sure the kernel does not support it.
 if grep -q 'accepts: .* glob-matching-pattern' README ; then
-- 
cgit v1.2.3


From 878cb3fb06c67e8f1a452346e0bc6bb85f29b0a0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 6 Feb 2018 17:28:36 -0500
Subject: selftests/ftrace: Add more tests for removing of function probes

Al Viro discovered a bug in the removing of function probes where if it had
a '*' at the beginning, it would fail to find any matches. That is, because
it reset the glob search string to the the initial string with a "MATCH_END"
type, instead of skipping the wildcard "*" it included it, where it would
not match any functions because "*" was being treated as a normal character
and not a wildcard one.

Link: http://lkml.kernel.org/r/20180127031706.GE13338@ZenIV.linux.org.uk

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../ftrace/test.d/ftrace/func_set_ftrace_file.tc   | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 0f3f92622e33..68e7a48f5828 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -128,6 +128,43 @@ if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then
     fail "Expected $FUNC1 and $FUNC2"
 fi
 
+test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter
+    cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual
+    DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+    test -z "$DIFF"
+}
+
+# Set traceoff trigger for all fuctions with "lock" in their name
+cat available_filter_functions | cut -d' ' -f1 |  grep 'lock' | sort -u > $TMPDIR/expected
+echo '*lock*:traceoff' > set_ftrace_filter
+test_actual
+
+# now remove all with 'try' in it, and end with lock
+grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!*try*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "m" and end with "lock"
+grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!m*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "c" and have "unlock"
+grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!c*unlock*:traceoff' >> set_ftrace_filter
+test_actual
+
+# clear all the rest
+> $TMPDIR/expected
+echo '!*:traceoff' >> set_ftrace_filter
+test_actual
+
+rm $TMPDIR/expected
+rm $TMPDIR/actual
+
 do_reset
 
 exit 0
-- 
cgit v1.2.3


From 8c88181ed452707f3815827225517b1964463b95 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 8 Feb 2018 12:48:12 +0100
Subject: bpf: Sync kernel ABI header with tooling header for bpf_common.h

I recently fixed up a lot of commits that forgot to keep the tooling
headers in sync.  And then I forgot to do the same thing in commit
cb5f7334d479 ("bpf: add comments to BPF ld/ldx sizes"). Let correct
that before people notice ;-).

Lawrence did partly fix/sync this for bpf.h in commit d6d4f60c3a09
("bpf: add selftest for tcpbpf").

Fixes: cb5f7334d479 ("bpf: add comments to BPF ld/ldx sizes")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf_common.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h
index 18be90725ab0..ee97668bdadb 100644
--- a/tools/include/uapi/linux/bpf_common.h
+++ b/tools/include/uapi/linux/bpf_common.h
@@ -15,9 +15,10 @@
 
 /* ld/ldx fields */
 #define BPF_SIZE(code)  ((code) & 0x18)
-#define		BPF_W		0x00
-#define		BPF_H		0x08
-#define		BPF_B		0x10
+#define		BPF_W		0x00 /* 32-bit */
+#define		BPF_H		0x08 /* 16-bit */
+#define		BPF_B		0x10 /*  8-bit */
+/* eBPF		BPF_DW		0x18    64-bit */
 #define BPF_MODE(code)  ((code) & 0xe0)
 #define		BPF_IMM		0x00
 #define		BPF_ABS		0x20
-- 
cgit v1.2.3


From 077c066a6c5445423147496e6c9b9a2c2d2ee762 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 8 Feb 2018 12:48:17 +0100
Subject: tools/libbpf: improve the pr_debug statements to contain section
 numbers

While debugging a bpf ELF loading issue, I needed to correlate the
ELF section number with the failed relocation section reference.
Thus, add section numbers/index to the pr_debug.

In debug mode, also print section that were skipped.  This helped
me identify that a section (.eh_frame) was skipped, and this was
the reason the relocation section (.rel.eh_frame) could not find
that section number.

The section numbers corresponds to the readelf tools Section Headers [Nr].

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index c64840365433..0c794f7fc050 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -319,8 +319,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 
 	prog->section_name = strdup(section_name);
 	if (!prog->section_name) {
-		pr_warning("failed to alloc name for prog under section %s\n",
-			   section_name);
+		pr_warning("failed to alloc name for prog under section(%d) %s\n",
+			   idx, section_name);
 		goto errout;
 	}
 
@@ -763,29 +763,29 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 
 		idx++;
 		if (gelf_getshdr(scn, &sh) != &sh) {
-			pr_warning("failed to get section header from %s\n",
-				   obj->path);
+			pr_warning("failed to get section(%d) header from %s\n",
+				   idx, obj->path);
 			err = -LIBBPF_ERRNO__FORMAT;
 			goto out;
 		}
 
 		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
 		if (!name) {
-			pr_warning("failed to get section name from %s\n",
-				   obj->path);
+			pr_warning("failed to get section(%d) name from %s\n",
+				   idx, obj->path);
 			err = -LIBBPF_ERRNO__FORMAT;
 			goto out;
 		}
 
 		data = elf_getdata(scn, 0);
 		if (!data) {
-			pr_warning("failed to get section data from %s(%s)\n",
-				   name, obj->path);
+			pr_warning("failed to get section(%d) data from %s(%s)\n",
+				   idx, name, obj->path);
 			err = -LIBBPF_ERRNO__FORMAT;
 			goto out;
 		}
-		pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n",
-			 name, (unsigned long)data->d_size,
+		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+			 idx, name, (unsigned long)data->d_size,
 			 (int)sh.sh_link, (unsigned long)sh.sh_flags,
 			 (int)sh.sh_type);
 
@@ -840,6 +840,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				obj->efile.reloc[n].shdr = sh;
 				obj->efile.reloc[n].data = data;
 			}
+		} else {
+			pr_debug("skip section(%d) %s\n", idx, name);
 		}
 		if (err)
 			goto out;
@@ -1119,8 +1121,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 
 		prog = bpf_object__find_prog_by_idx(obj, idx);
 		if (!prog) {
-			pr_warning("relocation failed: no %d section\n",
-				   idx);
+			pr_warning("relocation failed: no section(%d)\n", idx);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-- 
cgit v1.2.3


From 864db336c677c288d81524c30a656804785902f9 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 8 Feb 2018 12:48:22 +0100
Subject: selftests/bpf: add test program for loading BPF ELF files

V2: Moved program into selftests/bpf from tools/libbpf

This program can be used on its own for testing/debugging if a
BPF ELF-object file can be loaded with libbpf (from tools/lib/bpf).

If something is wrong with the ELF object, the program have
a --debug mode that will display the ELF sections and especially
the skipped sections.  This allows for quickly identifying the
problematic ELF section number, which can be corrolated with the
readelf tool.

The program signal error via return codes, and also have
a --quiet mode, which is practical for use in scripts like
selftests/bpf.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile           |   2 +-
 tools/testing/selftests/bpf/test_libbpf_open.c | 150 +++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_libbpf_open.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 566d6adc172a..3f48da0866ba 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -20,7 +20,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o test_tcpbpf_kern.o
+	sample_map_ret0.o test_tcpbpf_kern.o test_libbpf_open
 
 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
 	test_offload.py
diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
new file mode 100644
index 000000000000..8fcd1c076add
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf_open.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ =
+	"Libbpf test program for loading BPF ELF object files";
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <bpf/libbpf.h>
+#include <getopt.h>
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"debug",	no_argument,		NULL, 'D' },
+	{"quiet",	no_argument,		NULL, 'q' },
+	{0, 0, NULL,  0 }
+};
+
+static void usage(char *argv[])
+{
+	int i;
+
+	printf("\nDOCUMENTATION:\n%s\n\n", __doc__);
+	printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]);
+	printf(" Listing options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-12s", long_options[i].name);
+		printf(" short-option: -%c",
+		       long_options[i].val);
+		printf("\n");
+	}
+	printf("\n");
+}
+
+#define DEFINE_PRINT_FN(name, enabled) \
+static int libbpf_##name(const char *fmt, ...)  	\
+{							\
+        va_list args;					\
+        int ret;					\
+							\
+        va_start(args, fmt);				\
+	if (enabled) {					\
+		fprintf(stderr, "[" #name "] ");	\
+		ret = vfprintf(stderr, fmt, args);	\
+	}						\
+        va_end(args);					\
+        return ret;					\
+}
+DEFINE_PRINT_FN(warning, 1)
+DEFINE_PRINT_FN(info, 1)
+DEFINE_PRINT_FN(debug, 1)
+
+#define EXIT_FAIL_LIBBPF EXIT_FAILURE
+#define EXIT_FAIL_OPTION 2
+
+int test_walk_progs(struct bpf_object *obj, bool verbose)
+{
+	struct bpf_program *prog;
+	int cnt = 0;
+
+	bpf_object__for_each_program(prog, obj) {
+		cnt++;
+		if (verbose)
+			printf("Prog (count:%d) section_name: %s\n", cnt,
+			       bpf_program__title(prog, false));
+	}
+	return 0;
+}
+
+int test_walk_maps(struct bpf_object *obj, bool verbose)
+{
+	struct bpf_map *map;
+	int cnt = 0;
+
+	bpf_map__for_each(map, obj) {
+		cnt++;
+		if (verbose)
+			printf("Map (count:%d) name: %s\n", cnt,
+			       bpf_map__name(map));
+	}
+	return 0;
+}
+
+int test_open_file(char *filename, bool verbose)
+{
+	struct bpf_object *bpfobj = NULL;
+	long err;
+
+	if (verbose)
+		printf("Open BPF ELF-file with libbpf: %s\n", filename);
+
+	/* Load BPF ELF object file and check for errors */
+	bpfobj = bpf_object__open(filename);
+	err = libbpf_get_error(bpfobj);
+	if (err) {
+		char err_buf[128];
+		libbpf_strerror(err, err_buf, sizeof(err_buf));
+		if (verbose)
+			printf("Unable to load eBPF objects in file '%s': %s\n",
+			       filename, err_buf);
+		return EXIT_FAIL_LIBBPF;
+	}
+	test_walk_progs(bpfobj, verbose);
+	test_walk_maps(bpfobj, verbose);
+
+	if (verbose)
+		printf("Close BPF ELF-file with libbpf: %s\n",
+		       bpf_object__name(bpfobj));
+	bpf_object__close(bpfobj);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	char filename[1024] = { 0 };
+	bool verbose = 1;
+	int longindex = 0;
+	int opt;
+
+	libbpf_set_print(libbpf_warning, libbpf_info, NULL);
+
+	/* Parse commands line args */
+	while ((opt = getopt_long(argc, argv, "hDq",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 'D':
+			libbpf_set_print(libbpf_warning, libbpf_info,
+					 libbpf_debug);
+			break;
+		case 'q': /* Use in scripting mode */
+			verbose = 0;
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return EXIT_FAIL_OPTION;
+		}
+	}
+	if (optind >= argc) {
+		usage(argv);
+		printf("ERROR: Expected BPF_FILE argument after options\n");
+		return EXIT_FAIL_OPTION;
+	}
+	snprintf(filename, sizeof(filename), "%s", argv[optind]);
+
+	return test_open_file(filename, verbose);
+}
-- 
cgit v1.2.3


From f09b2e382e9a7053e3ae6f2fb6535efd5760cf5d Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 8 Feb 2018 12:48:27 +0100
Subject: selftests/bpf: add selftest that use test_libbpf_open

This script test_libbpf.sh will be part of the 'make run_tests'
invocation, but can also be invoked manually in this directory,
and a verbose mode can be enabled via setting the environment
variable $VERBOSE like:

 $ VERBOSE=yes ./test_libbpf.sh

The script contains some tests that are commented out, as they
currently fail.  They are reminders about what we need to improve
for the libbpf loader library.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile       | 14 +++++++--
 tools/testing/selftests/bpf/test_libbpf.sh | 49 ++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/bpf/test_libbpf.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3f48da0866ba..5c43c187f27c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,6 +13,7 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
+# Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
 
@@ -20,17 +21,26 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o test_tcpbpf_kern.o test_libbpf_open
+	sample_map_ret0.o test_tcpbpf_kern.o
 
-TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
+# Order correspond to 'make run_tests' order
+TEST_PROGS := test_kmod.sh \
+	test_libbpf.sh \
+	test_xdp_redirect.sh \
+	test_xdp_meta.sh \
 	test_offload.py
 
+# Compile but not part of 'make run_tests'
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open
+
 include ../lib.mk
 
 BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
+$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
new file mode 100755
index 000000000000..d97dc914cd49
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+export TESTNAME=test_libbpf
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+	if (( $? == 0 )); then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+}
+
+libbpf_open_file()
+{
+	LAST_LOADED=$1
+	if [ -n "$VERBOSE" ]; then
+	    ./test_libbpf_open $1
+	else
+	    ./test_libbpf_open --quiet $1
+	fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+libbpf_open_file test_l4lb.o
+
+# TODO: fix libbpf to load noinline functions
+# [warning] libbpf: incorrect bpf_call opcode
+#libbpf_open_file test_l4lb_noinline.o
+
+# TODO: fix test_xdp_meta.c to load with libbpf
+# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version
+#libbpf_open_file test_xdp_meta.o
+
+# TODO: fix libbpf to handle .eh_frame
+# [warning] libbpf: relocation failed: no section(10)
+#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o
+
+# Success
+exit 0
-- 
cgit v1.2.3


From e3d91b0ca523d53158f435a3e13df7f0cb360ea2 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 8 Feb 2018 12:48:32 +0100
Subject: tools/libbpf: handle issues with bpf ELF objects containing
 .eh_frames

V3: More generic skipping of relo-section (suggested by Daniel)

If clang >= 4.0.1 is missing the option '-target bpf', it will cause
llc/llvm to create two ELF sections for "Exception Frames", with
section names '.eh_frame' and '.rel.eh_frame'.

The BPF ELF loader library libbpf fails when loading files with these
sections.  The other in-kernel BPF ELF loader in samples/bpf/bpf_load.c,
handle this gracefully. And iproute2 loader also seems to work with these
"eh" sections.

The issue in libbpf is caused by bpf_object__elf_collect() skipping
some sections, and later when performing relocation it will be
pointing to a skipped section, as these sections cannot be found by
bpf_object__find_prog_by_idx() in bpf_object__collect_reloc().

This is a general issue that also occurs for other sections, like
debug sections which are also skipped and can have relo section.

As suggested by Daniel.  To avoid keeping state about all skipped
sections, instead perform a direct qlookup in the ELF object.  Lookup
the section that the relo-section points to and check if it contains
executable machine instructions (denoted by the sh_flags
SHF_EXECINSTR).  Use this check to also skip irrelevant relo-sections.

Note, for samples/bpf/ the '-target bpf' parameter to clang cannot be used
due to incompatibility with asm embedded headers, that some of the samples
include. This is explained in more details by Yonghong Song in bpf_devel_QA.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0c794f7fc050..97073d649c1a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -742,6 +742,24 @@ bpf_object__init_maps(struct bpf_object *obj)
 	return 0;
 }
 
+static bool section_have_execinstr(struct bpf_object *obj, int idx)
+{
+	Elf_Scn *scn;
+	GElf_Shdr sh;
+
+	scn = elf_getscn(obj->efile.elf, idx);
+	if (!scn)
+		return false;
+
+	if (gelf_getshdr(scn, &sh) != &sh)
+		return false;
+
+	if (sh.sh_flags & SHF_EXECINSTR)
+		return true;
+
+	return false;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj)
 {
 	Elf *elf = obj->efile.elf;
@@ -825,6 +843,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		} else if (sh.sh_type == SHT_REL) {
 			void *reloc = obj->efile.reloc;
 			int nr_reloc = obj->efile.nr_reloc + 1;
+			int sec = sh.sh_info; /* points to other section */
+
+			/* Only do relo for section with exec instructions */
+			if (!section_have_execinstr(obj, sec)) {
+				pr_debug("skip relo %s(%d) for section(%d)\n",
+					 name, idx, sec);
+				continue;
+			}
 
 			reloc = realloc(reloc,
 					sizeof(*obj->efile.reloc) * nr_reloc);
-- 
cgit v1.2.3


From 99ce7962d52d1948ad6f2785e308d48e76e0a6ef Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 8 Feb 2018 14:02:32 +0100
Subject: objtool: Fix switch-table detection

Linus reported that GCC-7.3 generated a switch-table construct that
confused objtool. It turns out that, in particular due to KASAN, it is
possible to have unrelated .rodata usage in between the .rodata setup
for the switch-table and the following indirect jump.

The simple linear reverse search from the indirect jump would hit upon
the KASAN .rodata usage first and fail to find a switch_table,
resulting in a spurious 'sibling call with modified stack frame'
warning.

Fix this by creating a 'jump-stack' which we can 'unwind' during
reversal, thereby skipping over much of the in-between code.

This is not fool proof by any means, but is sufficient to make the
known cases work. Future work would be to construct more comprehensive
flow analysis code.

Reported-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180208130232.GF25235@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 41 +++++++++++++++++++++++++++++++++++++++--
 tools/objtool/check.h |  1 +
 2 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9cd028aa1509..2e458eb45586 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -851,8 +851,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
  *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
  *    writing, there are 11 occurrences of it in the allmodconfig kernel.
  *
+ *    As of GCC 7 there are quite a few more of these and the 'in between' code
+ *    is significant. Esp. with KASAN enabled some of the code between the mov
+ *    and jmpq uses .rodata itself, which can confuse things.
+ *
  *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
  *    ensure the same register is used in the mov and jump instructions.
+ *
+ *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
 static struct rela *find_switch_table(struct objtool_file *file,
 				      struct symbol *func,
@@ -874,12 +880,25 @@ static struct rela *find_switch_table(struct objtool_file *file,
 						text_rela->addend + 4);
 		if (!rodata_rela)
 			return NULL;
+
 		file->ignore_unreachables = true;
 		return rodata_rela;
 	}
 
 	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
+	/*
+	 * Backward search using the @first_jump_src links, these help avoid
+	 * much of the 'in between' code. Which avoids us getting confused by
+	 * it.
+	 */
+	for (insn = list_prev_entry(insn, list);
+
+	     &insn->list != &file->insn_list &&
+	     insn->sec == func->sec &&
+	     insn->offset >= func->offset;
+
+	     insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
 		if (insn->type == INSN_JUMP_DYNAMIC)
 			break;
 
@@ -909,14 +928,32 @@ static struct rela *find_switch_table(struct objtool_file *file,
 	return NULL;
 }
 
+
 static int add_func_switch_tables(struct objtool_file *file,
 				  struct symbol *func)
 {
-	struct instruction *insn, *prev_jump = NULL;
+	struct instruction *insn, *last = NULL, *prev_jump = NULL;
 	struct rela *rela, *prev_rela = NULL;
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
+		if (!last)
+			last = insn;
+
+		/*
+		 * Store back-pointers for unconditional forward jumps such
+		 * that find_switch_table() can back-track using those and
+		 * avoid some potentially confusing code.
+		 */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+		    insn->offset > last->offset &&
+		    insn->jump_dest->offset > insn->offset &&
+		    !insn->jump_dest->first_jump_src) {
+
+			insn->jump_dest->first_jump_src = insn;
+			last = insn->jump_dest;
+		}
+
 		if (insn->type != INSN_JUMP_DYNAMIC)
 			continue;
 
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index dbadb304a410..23a1d065cae1 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -47,6 +47,7 @@ struct instruction {
 	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
+	struct instruction *first_jump_src;
 	struct list_head alts;
 	struct symbol *func;
 	struct stack_op stack_op;
-- 
cgit v1.2.3


From 941ff6f11c020913f5cddf543a9ec63475d7c082 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 9 Feb 2018 14:49:44 +0100
Subject: bpf: fix rlimit in reuseport net selftest

Fix two issues in the reuseport_bpf selftests that were
reported by Linaro CI:

  [...]
  + ./reuseport_bpf
  ---- IPv4 UDP ----
  Testing EBPF mod 10...
  Reprograming, testing mod 5...
  ./reuseport_bpf: ebpf error. log:
  0: (bf) r6 = r1
  1: (20) r0 = *(u32 *)skb[0]
  2: (97) r0 %= 10
  3: (95) exit
  processed 4 insns
  : Operation not permitted
  + echo FAIL
  [...]
  ---- IPv4 TCP ----
  Testing EBPF mod 10...
  ./reuseport_bpf: failed to bind send socket: Address already in use
  + echo FAIL
  [...]

For the former adjust rlimit since this was the cause of
failure for loading the BPF prog, and for the latter add
SO_REUSEADDR.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Link: https://bugs.linaro.org/show_bug.cgi?id=3502
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/reuseport_bpf.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 4a8217448f20..cad14cd0ea92 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -21,6 +21,7 @@
 #include <sys/epoll.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/resource.h>
 #include <unistd.h>
 
 #ifndef ARRAY_SIZE
@@ -190,11 +191,14 @@ static void send_from(struct test_params p, uint16_t sport, char *buf,
 	struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
 	struct sockaddr * const daddr =
 		new_loopback_sockaddr(p.send_family, p.recv_port);
-	const int fd = socket(p.send_family, p.protocol, 0);
+	const int fd = socket(p.send_family, p.protocol, 0), one = 1;
 
 	if (fd < 0)
 		error(1, errno, "failed to create send socket");
 
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+		error(1, errno, "failed to set reuseaddr");
+
 	if (bind(fd, saddr, sockaddr_size()))
 		error(1, errno, "failed to bind send socket");
 
@@ -433,6 +437,21 @@ void enable_fastopen(void)
 	}
 }
 
+static struct rlimit rlim_old, rlim_new;
+
+static  __attribute__((constructor)) void main_ctor(void)
+{
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+	rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+	rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+	setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+	setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
 int main(void)
 {
 	fprintf(stderr, "---- IPv4 UDP ----\n");
-- 
cgit v1.2.3


From 198ee8e17502da2634f7366395db1d77630e0219 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 12:10:10 +0100
Subject: selftests/x86: Fix vDSO selftest segfault for vsyscall=none

The vDSO selftest tries to execute a vsyscall unconditionally, even if it
is not present on the test system (e.g. if booted with vsyscall=none or
with CONFIG_LEGACY_VSYSCALL_NONE=y set. Fix this by copying (and tweaking)
the vsyscall check from test_vsyscall.c

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-3-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/test_vdso.c | 50 ++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 29973cde06d3..558c8207e7b9 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -28,18 +28,52 @@
 
 int nerrs = 0;
 
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
 #ifdef __x86_64__
-# define VSYS(x) (x)
+	FILE *maps;
+	char line[128];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+		return NULL;
+
+	while (fgets(line, sizeof(line), maps)) {
+		char r, x;
+		void *start, *end;
+		char name[128];
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		/* assume entries are OK, as we test vDSO here not vsyscall */
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("Warning: failed to find vsyscall getcpu\n");
+		return NULL;
+	}
+	return (void *) (0xffffffffff600800);
 #else
-# define VSYS(x) 0
+	return NULL;
 #endif
+}
 
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
-getcpu_t vdso_getcpu;
 
-void fill_function_pointers()
+static void fill_function_pointers()
 {
 	void *vdso = dlopen("linux-vdso.so.1",
 			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
@@ -54,6 +88,8 @@ void fill_function_pointers()
 	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
 	if (!vdso_getcpu)
 		printf("Warning: failed to find getcpu in vDSO\n");
+
+	vgetcpu = (getcpu_t) vsyscall_getcpu();
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
-- 
cgit v1.2.3


From d8e92de8ef952bed88c56c7a44c02d8dcae0984e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 21:59:24 +0100
Subject: selftests/x86: Clean up and document sscanf() usage

Replace a couple of magically connected buffer length literal constants with
a common definition that makes their relationship obvious. Also document
why our sscanf() usage is safe.

No intended functional changes.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211205924.GA23210@light.dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/test_vdso.c     | 11 ++++++++---
 tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 558c8207e7b9..235259011704 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -26,6 +26,9 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 int nerrs = 0;
 
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
@@ -37,17 +40,19 @@ static void *vsyscall_getcpu(void)
 {
 #ifdef __x86_64__
 	FILE *maps;
-	char line[128];
+	char line[MAPS_LINE_LEN];
 	bool found = false;
 
 	maps = fopen("/proc/self/maps", "r");
 	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
 		return NULL;
 
-	while (fgets(line, sizeof(line), maps)) {
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
 		char r, x;
 		void *start, *end;
-		char name[128];
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
 		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
 			   &start, &end, &r, &x, name) != 5)
 			continue;
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 7a744fa7b786..be81621446f0 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -33,6 +33,9 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -98,7 +101,7 @@ static int init_vsys(void)
 #ifdef __x86_64__
 	int nerrs = 0;
 	FILE *maps;
-	char line[128];
+	char line[MAPS_LINE_LEN];
 	bool found = false;
 
 	maps = fopen("/proc/self/maps", "r");
@@ -108,10 +111,12 @@ static int init_vsys(void)
 		return 0;
 	}
 
-	while (fgets(line, sizeof(line), maps)) {
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
 		char r, x;
 		void *start, *end;
-		char name[128];
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
 		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
 			   &start, &end, &r, &x, name) != 5)
 			continue;
-- 
cgit v1.2.3


From ce676638fe7b284132a7d7d5e7e7ad81bab9947e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 13 Feb 2018 08:26:17 +0100
Subject: selftests/x86/pkeys: Remove unused functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This also gets rid of two build warnings:

  protection_keys.c: In function ‘dumpit’:
  protection_keys.c:419:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result]
     write(1, buf, nr_read);
     ^~~~~~~~~~~~~~~~~~~~~~

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/protection_keys.c | 28 ---------------------------
 1 file changed, 28 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bc1b0735bb50..f15aa5a76fe3 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -393,34 +393,6 @@ pid_t fork_lazy_child(void)
 	return forkret;
 }
 
-void davecmp(void *_a, void *_b, int len)
-{
-	int i;
-	unsigned long *a = _a;
-	unsigned long *b = _b;
-
-	for (i = 0; i < len / sizeof(*a); i++) {
-		if (a[i] == b[i])
-			continue;
-
-		dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
-	}
-}
-
-void dumpit(char *f)
-{
-	int fd = open(f, O_RDONLY);
-	char buf[100];
-	int nr_read;
-
-	dprintf2("maps fd: %d\n", fd);
-	do {
-		nr_read = read(fd, &buf[0], sizeof(buf));
-		write(1, buf, nr_read);
-	} while (nr_read > 0);
-	close(fd);
-}
-
 #define PKEY_DISABLE_ACCESS    0x1
 #define PKEY_DISABLE_WRITE     0x2
 
-- 
cgit v1.2.3


From 7f95122067ab26fb8344b2a9de64ffbd0fea0010 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 12:10:09 +0100
Subject: selftests/x86: Fix build bug caused by the 5lvl test which has been
 moved to the VM directory

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Fixes: 235266b8e11c "selftests/vm: move 128TB mmap boundary test to generic directory"
Link: http://lkml.kernel.org/r/20180211111013.16888-2-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d4f10ac2af2..91fbfa8fdc15 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
-- 
cgit v1.2.3


From 2cbc0d66de0480449c75636f55697c7ff3af61fc Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 12:10:11 +0100
Subject: selftests/x86: Do not rely on "int $0x80" in test_mremap_vdso.c

On 64-bit builds, we should not rely on "int $0x80" working (it only does if
CONFIG_IA32_EMULATION=y is enabled).

Without this patch, the move test may succeed, but the "int $0x80" causes
a segfault, resulting in a false negative output of this self-test.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-4-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/test_mremap_vdso.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index bf0d687c7db7..64f11c8d9b76 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp)
 			vdso_size += PAGE_SIZE;
 		}
 
+#ifdef __i386__
 		/* Glibc is likely to explode now - exit with raw syscall */
 		asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+		syscall(SYS_exit, ret);
+#endif
 	} else {
 		int status;
 
-- 
cgit v1.2.3


From ecdf06e1ea5376bba03c155751f6869d3dfaa210 Mon Sep 17 00:00:00 2001
From: Harish <harish@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 12:02:55 +0530
Subject: selftests/powerpc: Fix to use ucontext_t instead of struct ucontext
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With glibc 2.26 'struct ucontext' is removed to improve POSIX
compliance, which breaks powerpc/alignment_handler selftest. Fix the
test by using ucontext_t. Tested on ppc, works with older glibc
versions as well.

Fixes the following:
  alignment_handler.c: In function ‘sighandler’:
  alignment_handler.c:68:5: error: dereferencing pointer to incomplete type ‘struct ucontext’
    ucp->uc_mcontext.gp_regs[PT_NIP] += 4;

Signed-off-by: Harish <harish@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/alignment/alignment_handler.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 39fd362415cf..0f2698f9fd6d 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -57,7 +57,7 @@ volatile int gotsig;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
-	struct ucontext *ucp = ctx;
+	ucontext_t *ucp = ctx;
 
 	if (!testing) {
 		signal(sig, SIG_DFL);
-- 
cgit v1.2.3


From 4105c69703cdeba76f384b901712c9397b04e9c2 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 13 Feb 2018 09:13:21 +0100
Subject: selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c

On 64-bit builds, we should not rely on "int $0x80" working (it only does if
CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80"
test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build
this test only if we can also build 32-bit binaries (which should be a
good approximation for that).

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-5-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile              | 2 ++
 tools/testing/selftests/x86/single_step_syscall.c | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 91fbfa8fdc15..73b8ef665c98 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -30,11 +30,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
 endif
 
 all_32: $(BINARIES_32)
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index a48da95c18fd..ddfdd635de16 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -119,7 +119,9 @@ static void check_result(void)
 
 int main()
 {
+#ifdef CAN_BUILD_32
 	int tmp;
+#endif
 
 	sethandler(SIGTRAP, sigtrap, 0);
 
@@ -139,12 +141,13 @@ int main()
 		      : : "c" (post_nop) : "r11");
 	check_result();
 #endif
-
+#ifdef CAN_BUILD_32
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
 	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
 			: INT80_CLOBBERS);
 	check_result();
+#endif
 
 	/*
 	 * This test is particularly interesting if fast syscalls use
-- 
cgit v1.2.3


From 9279ddf23ce78ff2676e8e8e19fec0f022c26d04 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 13 Feb 2018 09:15:19 +0100
Subject: selftests/x86: Disable tests requiring 32-bit support on pure 64-bit
 systems

The ldt_gdt and ptrace_syscall selftests, even in their 64-bit variant, use
hard-coded 32-bit syscall numbers and call "int $0x80".

This will fail on 64-bit systems with CONFIG_IA32_EMULATION=y disabled.

Therefore, do not build these tests if we cannot build 32-bit binaries
(which should be a good approximation for CONFIG_IA32_EMULATION=y being enabled).

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-6-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 73b8ef665c98..aa6e2d7f6a1f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,16 +5,26 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
 			protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
-- 
cgit v1.2.3


From fe24e27128252c230a34a6c628da2bf1676781ea Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 8 Feb 2018 17:09:25 -0600
Subject: objtool: Fix segfault in ignore_unreachable_insn()

Peter Zijlstra's patch for converting WARN() to use UD2 triggered a
bunch of false "unreachable instruction" warnings, which then triggered
a seg fault in ignore_unreachable_insn().

The seg fault happened when it tried to dereference a NULL 'insn->func'
pointer.  Thanks to static_cpu_has(), some functions can jump to a
non-function area in the .altinstr_aux section.  That breaks
ignore_unreachable_insn()'s assumption that it's always inside the
original function.

Make sure ignore_unreachable_insn() only follows jumps within the
current function.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kbuild test robot <fengguang.wu@intel.com>
Link: http://lkml.kernel.org/r/bace77a60d5af9b45eddb8f8fb9c776c8de657ef.1518130694.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2e458eb45586..c7fb5c2392ee 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1935,13 +1935,19 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
 			return true;
 
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			if (insn->jump_dest &&
+			    insn->jump_dest->func == insn->func) {
+				insn = insn->jump_dest;
+				continue;
+			}
+
+			break;
 		}
 
 		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
 			break;
+
 		insn = list_next_entry(insn, list);
 	}
 
-- 
cgit v1.2.3


From 961888b1d76d84efc66a8f5604b06ac12ac2f978 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Mon, 18 Dec 2017 16:34:10 +0800
Subject: selftests/x86/mpx: Fix incorrect bounds with old _sigfault

For distributions with old userspace header files, the _sigfault
structure is different. mpx-mini-test fails with the following
error:

  [root@Purley]# mpx-mini-test_64 tabletest
  XSAVE is supported by HW & OS
  XSAVE processor supported state mask: 0x2ff
  XSAVE OS supported state mask: 0x2ff
   BNDREGS: size: 64 user: 1 supervisor: 0 aligned: 0
    BNDCSR: size: 64 user: 1 supervisor: 0 aligned: 0
  starting mpx bounds table test
  ERROR: siginfo bounds do not match shadow bounds for register 0

Fix it by using the correct offset of _lower/_upper in _sigfault.
RHEL needs this patch to work.

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave.hansen@linux.intel.com
Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test")
Link: http://lkml.kernel.org/r/1513586050-1641-1-git-send-email-rui.y.wang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/mpx-mini-test.c | 32 +++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index ec0f6b45ce8b..9c0325e1ea68 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si)
 	return si->si_upper;
 }
 #else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+        struct {
+            void *si_addr;
+	} _sigfault;
+
+new _sigfault:
+	struct {
+		void __user *_addr;
+		int _trapno;
+		short _addr_lsb;
+		union {
+			struct {
+				void __user *_lower;
+				void __user *_upper;
+			} _addr_bnd;
+			__u32 _pkey;
+		};
+	} _sigfault;
+ *
+ */
+
 static inline void **__si_bounds_hack(siginfo_t *si)
 {
 	void *sigfault = &si->_sifields._sigfault;
 	void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
-	void **__si_lower = end_sigfault;
+	int *trapno = (int*)end_sigfault;
+	/* skip _trapno and _addr_lsb */
+	void **__si_lower = (void**)(trapno + 2);
 
 	return __si_lower;
 }
@@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si)
 
 static inline void *__si_bounds_upper(siginfo_t *si)
 {
-	return (*__si_bounds_hack(si)) + sizeof(void *);
+	return *(__si_bounds_hack(si) + 1);
 }
 #endif
 
-- 
cgit v1.2.3


From 0b7c1528fb741803396da68a9d8d285ff7db731c Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Tue, 30 Jan 2018 22:28:13 -0500
Subject: perf vendor events aarch64: Add JSON metrics for ARM Cortex-A53
 Processor

Add JSON metrics for ARM Cortex-A53 Processor.

Unlike the Intel processors there isn't a script that automatically
generated these files. The patch was manually generated from the
documentation and the previous oprofile ARM Cortex ac53 event file patch
I made.

The relevant documentation is in the "12.9 Events" section of the ARM
Cortex A53 MPCore Processor Revision: r0p4 Technical Reference Manual.

The ARM Cortex A53 manual is available at:

  http://infocenter.arm.com/help/topic/com.arm.doc.ddi0500g/DDI0500G_cortex_a53_trm.pdf

Use that to look for additional information about the events.

Signed-off-by: William Cohen <wcohen@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180131032813.9564-1-wcohen@redhat.com
[ Added references provided by William Cohen ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/arm64/cortex-a53/branch.json   | 27 +++++++++++
 .../perf/pmu-events/arch/arm64/cortex-a53/bus.json | 22 +++++++++
 .../pmu-events/arch/arm64/cortex-a53/cache.json    | 27 +++++++++++
 .../pmu-events/arch/arm64/cortex-a53/memory.json   | 22 +++++++++
 .../pmu-events/arch/arm64/cortex-a53/other.json    | 32 +++++++++++++
 .../pmu-events/arch/arm64/cortex-a53/pipeline.json | 52 ++++++++++++++++++++++
 tools/perf/pmu-events/arch/arm64/mapfile.csv       |  1 +
 7 files changed, 183 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
new file mode 100644
index 000000000000..3b6208763e50
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0x7A",
+    "EventName": "BR_INDIRECT_SPEC",
+    "BriefDescription": "Branch speculatively executed - Indirect branch"
+  },
+  {,
+    "EventCode": "0xC9",
+    "EventName": "BR_COND",
+    "BriefDescription": "Conditional branch executed"
+  },
+  {,
+    "EventCode": "0xCA",
+    "EventName": "BR_INDIRECT_MISPRED",
+    "BriefDescription": "Indirect branch mispredicted"
+  },
+  {,
+    "EventCode": "0xCB",
+    "EventName": "BR_INDIRECT_MISPRED_ADDR",
+    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+  },
+  {,
+    "EventCode": "0xCC",
+    "EventName": "BR_COND_MISPRED",
+    "BriefDescription": "Conditional branch mispredicted"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
new file mode 100644
index 000000000000..11baad6344b9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0xC2",
+    "EventName": "PREFETCH_LINEFILL",
+    "BriefDescription": "Linefill because of prefetch"
+  },
+  {,
+    "EventCode": "0xC3",
+    "EventName": "PREFETCH_LINEFILL_DROP",
+    "BriefDescription": "Instruction Cache Throttle occurred"
+  },
+  {,
+    "EventCode": "0xC4",
+    "EventName": "READ_ALLOC_ENTER",
+    "BriefDescription": "Entering read allocate mode"
+  },
+  {,
+    "EventCode": "0xC5",
+    "EventName": "READ_ALLOC",
+    "BriefDescription": "Read allocate mode"
+  },
+  {,
+    "EventCode": "0xC8",
+    "EventName": "EXT_SNOOP",
+    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
new file mode 100644
index 000000000000..73a22402d003
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
@@ -0,0 +1,32 @@
+[
+  {,
+    "EventCode": "0x86",
+    "EventName": "EXC_IRQ",
+    "BriefDescription": "Exception taken, IRQ"
+  },
+  {,
+    "EventCode": "0x87",
+    "EventName": "EXC_FIQ",
+    "BriefDescription": "Exception taken, FIQ"
+  },
+  {,
+    "EventCode": "0xC6",
+    "EventName": "PRE_DECODE_ERR",
+    "BriefDescription": "Pre-decode error"
+  },
+  {,
+    "EventCode": "0xD0",
+    "EventName": "L1I_CACHE_ERR",
+    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  },
+  {,
+    "EventCode": "0xD1",
+    "EventName": "L1D_CACHE_ERR",
+    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  },
+  {,
+    "EventCode": "0xD2",
+    "EventName": "TLB_ERR",
+    "BriefDescription": "TLB memory error"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
new file mode 100644
index 000000000000..3149fb90555a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
+[
+  {,
+    "EventCode": "0xC7",
+    "EventName": "STALL_SB_FULL",
+    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+  },
+  {,
+    "EventCode": "0xE0",
+    "EventName": "OTHER_IQ_DEP_STALL",
+    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+  },
+  {,
+    "EventCode": "0xE1",
+    "EventName": "IC_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+  },
+  {,
+    "EventCode": "0xE2",
+    "EventName": "IUTLB_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+  },
+  {,
+    "EventCode": "0xE3",
+    "EventName": "DECODE_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+  },
+  {,
+    "EventCode": "0xE4",
+    "EventName": "OTHER_INTERLOCK_STALL",
+    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
+  },
+  {,
+    "EventCode": "0xE5",
+    "EventName": "AGU_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+  },
+  {,
+    "EventCode": "0xE6",
+    "EventName": "SIMD_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+  },
+  {,
+    "EventCode": "0xE7",
+    "EventName": "LD_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+  },
+  {,
+    "EventCode": "0xE8",
+    "EventName": "ST_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 219d6756134e..e61c9ca6cf9e 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -13,3 +13,4 @@
 #
 #Family-model,Version,Filename,EventType
 0x00000000420f5160,v1,cavium,core
+0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
-- 
cgit v1.2.3


From 6888ff66c44ffa3077ed69e978902d0ff4b84ae1 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:16 -0800
Subject: perf evlist: Remove stale mmap read for backward

perf_evlist__mmap_read_catchup() and perf_evlist__mmap_read_backward()
are only for overwrite mode.

But they read the evlist->mmap buffer which is for non-overwrite mode.

It did not bring any serious problem yet, because there is no one use
it.

Remove the unused interfaces.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Wang Nan <wangnan0@huawei.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1516310792-208685-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 17 -----------------
 tools/perf/util/evlist.h |  4 ----
 2 files changed, 21 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ac35cd214feb..e5fc14e53c05 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
 	return perf_mmap__read_forward(md);
 }
 
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
-{
-	struct perf_mmap *md = &evlist->mmap[idx];
-
-	/*
-	 * No need to check messup for backward ring buffer:
-	 * We can always read arbitrary long data from a backward
-	 * ring buffer unless we forget to pause it before reading.
-	 */
-	return perf_mmap__read_backward(md);
-}
-
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	return perf_evlist__mmap_read_forward(evlist, idx);
 }
 
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
-{
-	perf_mmap__read_catchup(&evlist->mmap[idx]);
-}
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 {
 	perf_mmap__consume(&evlist->mmap[idx], false);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 75f8e0ad5d76..336b838e6957 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
 						 int idx);
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
-						  int idx);
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
-- 
cgit v1.2.3


From dc6c35c679e96987dc83a003f30bc2cc33c84c00 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:17 -0800
Subject: perf mmap: Recalculate size for overwrite mode

In perf_mmap__push(), the 'size' need to be recalculated, otherwise the
invalid data might be pushed to the record in overwrite mode.

The issue is introduced by commit 7fb4b407a124 ("perf mmap: Don't
discard prev in backward mode").

When the ring buffer is full in overwrite mode, backward_rb_find_range()
will be called to recalculate the 'start' and 'end'. The 'size' needs to
be recalculated accordingly.

Unconditionally recalculate the 'size', not just for full ring buffer in
overwrite mode. Because:

- There is no harmful to recalculate the 'size' for other cases.
- The code of calculating 'start' and 'end' will be factored out later.
  The new function does not need to return 'size'.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 7fb4b407a124 ("perf mmap: Don't discard prev in backward mode")
Link: http://lkml.kernel.org/r/1516310792-208685-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 05076e683938..97cf4fab564b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -302,6 +302,8 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 			return -1;
 	}
 
+	size = end - start;
+
 	if ((start & md->mask) + size != (end & md->mask)) {
 		buf = &data[start & md->mask];
 		size = md->mask + 1 - (start & md->mask);
-- 
cgit v1.2.3


From f92c8cbe597a5a2ccec702dff824f3fe0f3623eb Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:18 -0800
Subject: perf mmap: Cleanup perf_mmap__push()

The first assignment for 'start' and 'end' is redundant.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-4-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 97cf4fab564b..fbbbe87f0308 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -272,7 +272,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
-	u64 end = head, start = old;
+	u64 end, start;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
 	void *buf;
-- 
cgit v1.2.3


From 8872481bd04850b19e053dc579de5a11b83b16fc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:19 -0800
Subject: perf mmap: Introduce perf_mmap__read_init()

The new function perf_mmap__read_init() is factored out from
perf_mmap__push().

It is to calculate the 'start' and 'end' of the available data in
ringbuffer.

No functional change.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-5-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 37 +++++++++++++++++++++++++++----------
 tools/perf/util/mmap.h |  2 ++
 2 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index fbbbe87f0308..c19a4e640e8e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -267,24 +267,24 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
 	return -1;
 }
 
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
-		    void *to, int push(void *to, void *buf, size_t size))
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+			 u64 *startp, u64 *endp)
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
-	u64 end, start;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
-	void *buf;
-	int rc = 0;
 
-	start = overwrite ? head : old;
-	end = overwrite ? old : head;
+	*startp = overwrite ? head : old;
+	*endp = overwrite ? old : head;
 
-	if (start == end)
+	if (*startp == *endp)
 		return 0;
 
-	size = end - start;
+	size = *endp - *startp;
 	if (size > (unsigned long)(md->mask) + 1) {
 		if (!overwrite) {
 			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
@@ -298,10 +298,27 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 		 * Backward ring buffer is full. We still have a chance to read
 		 * most of data from it.
 		 */
-		if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
+		if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
 			return -1;
 	}
 
+	return 1;
+}
+
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
+		    void *to, int push(void *to, void *buf, size_t size))
+{
+	u64 head = perf_mmap__read_head(md);
+	u64 end, start;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int rc = 0;
+
+	rc = perf_mmap__read_init(md, overwrite, &start, &end);
+	if (rc < 1)
+		return rc;
+
 	size = end - start;
 
 	if ((start & md->mask) + size != (end & md->mask)) {
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e43d7b55a55f..9ab2b48df65b 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -94,4 +94,6 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+			 u64 *startp, u64 *endp);
 #endif /*__PERF_MMAP_H */
-- 
cgit v1.2.3


From 189f2cc91f9f2efef5d5f4dde43684c01b5f6f2f Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:20 -0800
Subject: perf mmap: Add new return value logic for perf_mmap__read_init()

Improve the readability by using meaningful enum (-EAGAIN, -EINVAL and
0) to replace the three returning states (0, -1 and 1).

Suggested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-6-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index c19a4e640e8e..38fa69dc635e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -282,7 +282,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 	*endp = overwrite ? old : head;
 
 	if (*startp == *endp)
-		return 0;
+		return -EAGAIN;
 
 	size = *endp - *startp;
 	if (size > (unsigned long)(md->mask) + 1) {
@@ -291,7 +291,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 
 			md->prev = head;
 			perf_mmap__consume(md, overwrite);
-			return 0;
+			return -EAGAIN;
 		}
 
 		/*
@@ -299,10 +299,10 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 		 * most of data from it.
 		 */
 		if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
-			return -1;
+			return -EINVAL;
 	}
 
-	return 1;
+	return 0;
 }
 
 int perf_mmap__push(struct perf_mmap *md, bool overwrite,
@@ -316,8 +316,8 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 	int rc = 0;
 
 	rc = perf_mmap__read_init(md, overwrite, &start, &end);
-	if (rc < 1)
-		return rc;
+	if (rc < 0)
+		return (rc == -EAGAIN) ? 0 : -1;
 
 	size = end - start;
 
-- 
cgit v1.2.3


From b4b036b4c76341a5034e872aca3727c4988a7304 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:21 -0800
Subject: perf mmap: Discard 'prev' in perf_mmap__read()

The 'start' and 'prev' variables are duplicates in perf_mmap__read().

Use 'map->prev' to replace 'start' in perf_mmap__read_*().

Suggested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-7-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 38fa69dc635e..125bfda9d037 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
 
 /* When check_messup is true, 'end' must points to a good entry */
 static union perf_event *perf_mmap__read(struct perf_mmap *map,
-					 u64 start, u64 end, u64 *prev)
+					 u64 *startp, u64 end)
 {
 	unsigned char *data = map->base + page_size;
 	union perf_event *event = NULL;
-	int diff = end - start;
+	int diff = end - *startp;
 
 	if (diff >= (int)sizeof(event->header)) {
 		size_t size;
 
-		event = (union perf_event *)&data[start & map->mask];
+		event = (union perf_event *)&data[*startp & map->mask];
 		size = event->header.size;
 
-		if (size < sizeof(event->header) || diff < (int)size) {
-			event = NULL;
-			goto broken_event;
-		}
+		if (size < sizeof(event->header) || diff < (int)size)
+			return NULL;
 
 		/*
 		 * Event straddles the mmap boundary -- header should always
 		 * be inside due to u64 alignment of output.
 		 */
-		if ((start & map->mask) + size != ((start + size) & map->mask)) {
-			unsigned int offset = start;
+		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+			unsigned int offset = *startp;
 			unsigned int len = min(sizeof(*event), size), cpy;
 			void *dst = map->event_copy;
 
@@ -59,20 +57,15 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 			event = (union perf_event *)map->event_copy;
 		}
 
-		start += size;
+		*startp += size;
 	}
 
-broken_event:
-	if (prev)
-		*prev = start;
-
 	return event;
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 {
 	u64 head;
-	u64 old = map->prev;
 
 	/*
 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -82,13 +75,12 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 
 	head = perf_mmap__read_head(map);
 
-	return perf_mmap__read(map, old, head, &map->prev);
+	return perf_mmap__read(map, &map->prev, head);
 }
 
 union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 {
 	u64 head, end;
-	u64 start = map->prev;
 
 	/*
 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -118,7 +110,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 	else
 		end = head + map->mask + 1;
 
-	return perf_mmap__read(map, start, end, &map->prev);
+	return perf_mmap__read(map, &map->prev, end);
 }
 
 void perf_mmap__read_catchup(struct perf_mmap *map)
-- 
cgit v1.2.3


From ee023de05f35484691f7d9e5c1f92195ac4d64d2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:22 -0800
Subject: perf mmap: Introduce perf_mmap__read_done()

The direction of overwrite mode is backward. The last perf_mmap__read()
will set tail to map->prev. Need to correct the map->prev to head which
is the end of next read.

It will be used later.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-8-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 11 +++++++++++
 tools/perf/util/mmap.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 125bfda9d037..4f59eaefc706 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -338,3 +338,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 out:
 	return rc;
 }
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+	map->prev = perf_mmap__read_head(map);
+}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 9ab2b48df65b..95549d4af943 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -96,4 +96,5 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
 int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 			 u64 *startp, u64 *endp);
+void perf_mmap__read_done(struct perf_mmap *map);
 #endif /*__PERF_MMAP_H */
-- 
cgit v1.2.3


From 7bb45972952db9298fe5cc440160dcad1a66bfbc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:23 -0800
Subject: perf mmap: Introduce perf_mmap__read_event()

Except for 'perf record', the other perf tools read events one by one
from the ring buffer using perf_mmap__read_forward(). But it only
supports non-overwrite mode.

Introduce perf_mmap__read_event() to support both non-overwrite and
overwrite mode.

Usage:
perf_mmap__read_init()
while(event = perf_mmap__read_event()) {
        //process the event
        perf_mmap__consume()
}
perf_mmap__read_done()

It cannot use perf_mmap__read_backward(). Because it always reads the
stale buffer which is already processed. Furthermore, the forward and
backward concepts have been removed. The perf_mmap__read_backward() will
be replaced and discarded later.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-9-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 39 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/mmap.h |  4 ++++
 2 files changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 4f59eaefc706..f804926778b7 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -113,6 +113,45 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 	return perf_mmap__read(map, &map->prev, end);
 }
 
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ *	//process the event
+ *	perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+					bool overwrite,
+					u64 *startp, u64 end)
+{
+	union perf_event *event;
+
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return NULL;
+
+	if (startp == NULL)
+		return NULL;
+
+	/* non-overwirte doesn't pause the ringbuffer */
+	if (!overwrite)
+		end = perf_mmap__read_head(map);
+
+	event = perf_mmap__read(map, startp, end);
+
+	if (!overwrite)
+		map->prev = *startp;
+
+	return event;
+}
+
 void perf_mmap__read_catchup(struct perf_mmap *map)
 {
 	u64 head;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 95549d4af943..28718543dd42 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -89,6 +89,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
 union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
 
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+					bool overwrite,
+					u64 *startp, u64 end);
+
 int perf_mmap__push(struct perf_mmap *md, bool backward,
 		    void *to, int push(void *to, void *buf, size_t size));
 
-- 
cgit v1.2.3


From 600a7cfe88de2c6e44e23d61dd721b996b790eb2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:24 -0800
Subject: perf test: Update mmap read functions for backward-ring-buffer test

Use the new perf_mmap__read_* interfaces for overwrite ringbuffer test.

Commiter notes:

Testing:

  [root@seventh ~]# perf test -v backward
  48: Read backward ring buffer                             :
  --- start ---
  test child forked, pid 8309
  Using CPUID GenuineIntel-6-9E
  mmap size 1052672B
  mmap size 8192B
  Finished reading overwrite ring buffer: rewind
  test child finished with 0
  ---- end ----
  Read backward ring buffer: Ok
  [root@seventh ~]#

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-10-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/backward-ring-buffer.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 4035d43523c3..e0b1b414d466 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 	int i;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *map = &evlist->overwrite_mmap[i];
 		union perf_event *event;
+		u64 start, end;
 
-		perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
-		while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
+		perf_mmap__read_init(map, true, &start, &end);
+		while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
 			const u32 type = event->header.type;
 
 			switch (type) {
@@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 				return TEST_FAIL;
 			}
 		}
+		perf_mmap__read_done(map);
 	}
 	return TEST_OK;
 }
-- 
cgit v1.2.3


From 3effc2f165a842d640873e29d4c5cc1650143aef Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:25 -0800
Subject: perf mmap: Discard legacy interface for mmap read

Discards perf_mmap__read_backward() and perf_mmap__read_catchup(). No
tools use them.

There are tools still use perf_mmap__read_forward(). Keep it, but add
comments to point to the new interface for future use.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-11-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 50 ++++----------------------------------------------
 tools/perf/util/mmap.h |  3 ---
 2 files changed, 4 insertions(+), 49 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index f804926778b7..91531a7c8fbf 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -63,6 +63,10 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 	return event;
 }
 
+/*
+ * legacy interface for mmap read.
+ * Don't use it. Use perf_mmap__read_event().
+ */
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 {
 	u64 head;
@@ -78,41 +82,6 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 	return perf_mmap__read(map, &map->prev, head);
 }
 
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
-{
-	u64 head, end;
-
-	/*
-	 * Check if event was unmapped due to a POLLHUP/POLLERR.
-	 */
-	if (!refcount_read(&map->refcnt))
-		return NULL;
-
-	head = perf_mmap__read_head(map);
-	if (!head)
-		return NULL;
-
-	/*
-	 * 'head' pointer starts from 0. Kernel minus sizeof(record) form
-	 * it each time when kernel writes to it, so in fact 'head' is
-	 * negative. 'end' pointer is made manually by adding the size of
-	 * the ring buffer to 'head' pointer, means the validate data can
-	 * read is the whole ring buffer. If 'end' is positive, the ring
-	 * buffer has not fully filled, so we must adjust 'end' to 0.
-	 *
-	 * However, since both 'head' and 'end' is unsigned, we can't
-	 * simply compare 'end' against 0. Here we compare '-head' and
-	 * the size of the ring buffer, where -head is the number of bytes
-	 * kernel write to the ring buffer.
-	 */
-	if (-head < (u64)(map->mask + 1))
-		end = 0;
-	else
-		end = head + map->mask + 1;
-
-	return perf_mmap__read(map, &map->prev, end);
-}
-
 /*
  * Read event from ring buffer one by one.
  * Return one event for each call.
@@ -152,17 +121,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
 	return event;
 }
 
-void perf_mmap__read_catchup(struct perf_mmap *map)
-{
-	u64 head;
-
-	if (!refcount_read(&map->refcnt))
-		return;
-
-	head = perf_mmap__read_head(map);
-	map->prev = head;
-}
-
 static bool perf_mmap__empty(struct perf_mmap *map)
 {
 	return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 28718543dd42..ec7d3a24e276 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map);
 
 void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
 
-void perf_mmap__read_catchup(struct perf_mmap *md);
-
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
@@ -87,7 +85,6 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
 
 union perf_event *perf_mmap__read_event(struct perf_mmap *map,
 					bool overwrite,
-- 
cgit v1.2.3


From 63878a53cedc3df31bd4ba8740a49fa0fc116ac6 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:26 -0800
Subject: perf top: Check per-event overwrite term

Per-event overwrite term is not forbidden in 'perf top', which can bring
problems. Because 'perf top' only support non-overwrite mode now.

Add new rules and check regarding to overwrite term for 'perf top'.
- All events either have same per-event term or don't have per-event
  mode setting. Otherwise, it will error out.
- Per-event overwrite term should be consistent as opts->overwrite.
  If not, updating the opts->overwrite according to per-event term.

Make it possible to support either non-overwrite or overwrite mode.
The overwrite mode is forbidden now, which will be removed when the
overwrite mode is supported later.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-12-git-send-email-kan.liang@intel.com
[ Renamed perf_top_overwrite_check to perf_top__overwrite_check, to follow existing convention ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c6ccda52117d..17783798924a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -881,6 +881,68 @@ static void perf_top__mmap_read(struct perf_top *top)
 		perf_top__mmap_read_idx(top, i);
 }
 
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ *   Nothing change, return 0.
+ * - All events have same per-event term
+ *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ *   Using the per-event setting to replace the opts->overwrite if
+ *   they are different, then return 0.
+ * - Events have different per-event term
+ *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ * - Some of the event set per-event term, but some not.
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel_config_term *term;
+	struct list_head *config_terms;
+	struct perf_evsel *evsel;
+	int set, overwrite = -1;
+
+	evlist__for_each_entry(evlist, evsel) {
+		set = -1;
+		config_terms = &evsel->config_terms;
+		list_for_each_entry(term, config_terms, list) {
+			if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+				set = term->val.overwrite ? 1 : 0;
+		}
+
+		/* no term for current and previous event (likely) */
+		if ((overwrite < 0) && (set < 0))
+			continue;
+
+		/* has term for both current and previous event, compare */
+		if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+			return -1;
+
+		/* no term for current event but has term for previous one */
+		if ((overwrite >= 0) && (set < 0))
+			return -1;
+
+		/* has term for current event */
+		if ((overwrite < 0) && (set >= 0)) {
+			/* if it's first event, set overwrite */
+			if (evsel == perf_evlist__first(evlist))
+				overwrite = set;
+			else
+				return -1;
+		}
+	}
+
+	if ((overwrite >= 0) && (opts->overwrite != overwrite))
+		opts->overwrite = overwrite;
+
+	return 0;
+}
+
 static int perf_top__start_counters(struct perf_top *top)
 {
 	char msg[BUFSIZ];
@@ -888,6 +950,17 @@ static int perf_top__start_counters(struct perf_top *top)
 	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;
 
+	if (perf_top__overwrite_check(top)) {
+		ui__error("perf top only support consistent per-event "
+			  "overwrite setting for all events\n");
+		goto out_err;
+	}
+
+	if (opts->overwrite) {
+		ui__error("not support overwrite mode yet\n");
+		goto out_err;
+	}
+
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, counter) {
-- 
cgit v1.2.3


From 9a831b3a32c5daf5d7cc672334d51930f78e4ea3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 2 Feb 2018 11:27:25 -0300
Subject: perf evsel: Expose the perf_missing_features struct

As tools may need to adjust to missing features, as 'perf top' will, in
the next csets, to cope with a missing 'write_backward' feature.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-jelngl9q1ooaizvkcput9tic@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 12 +-----------
 tools/perf/util/evsel.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ff359c9ece2e..ef351688b797 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -41,17 +41,7 @@
 
 #include "sane_ctype.h"
 
-static struct {
-	bool sample_id_all;
-	bool exclude_guest;
-	bool mmap2;
-	bool cloexec;
-	bool clockid;
-	bool clockid_wrong;
-	bool lbr_flags;
-	bool write_backward;
-	bool group_read;
-} perf_missing_features;
+struct perf_missing_features perf_missing_features;
 
 static clockid_t clockid;
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 846e41644525..a7487c6d1866 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -149,6 +149,20 @@ union u64_swap {
 	u32 val32[2];
 };
 
+struct perf_missing_features {
+	bool sample_id_all;
+	bool exclude_guest;
+	bool mmap2;
+	bool cloexec;
+	bool clockid;
+	bool clockid_wrong;
+	bool lbr_flags;
+	bool write_backward;
+	bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
 struct cpu_map;
 struct target;
 struct thread_map;
-- 
cgit v1.2.3


From 204721d7eabe6ee98aafce791ce3efdbc4715834 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:28 -0800
Subject: perf top: Add overwrite fall back

Switch to non-overwrite mode if kernel doesnot support overwrite
ringbuffer.

It's only effect when overwrite mode is supported.  No change to current
behavior.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-14-git-send-email-kan.liang@intel.com
[ Use perf_missing_features.write_backward instead of the non merged is_write_backward_fail() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 17783798924a..ee4bba1e282c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -943,6 +943,27 @@ static int perf_top__overwrite_check(struct perf_top *top)
 	return 0;
 }
 
+static int perf_top_overwrite_fallback(struct perf_top *top,
+				       struct perf_evsel *evsel)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel *counter;
+
+	if (!opts->overwrite)
+		return 0;
+
+	/* only fall back when first event fails */
+	if (evsel != perf_evlist__first(evlist))
+		return 0;
+
+	evlist__for_each_entry(evlist, counter)
+		counter->attr.write_backward = false;
+	opts->overwrite = false;
+	ui__warning("fall back to non-overwrite mode\n");
+	return 1;
+}
+
 static int perf_top__start_counters(struct perf_top *top)
 {
 	char msg[BUFSIZ];
@@ -967,6 +988,21 @@ static int perf_top__start_counters(struct perf_top *top)
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
+
+			/*
+			 * Specially handle overwrite fall back.
+			 * Because perf top is the only tool which has
+			 * overwrite mode by default, support
+			 * both overwrite and non-overwrite mode, and
+			 * require consistent mode for all events.
+			 *
+			 * May move it to generic code with more tools
+			 * have similar attribute.
+			 */
+			if (perf_missing_features.write_backward &&
+			    perf_top_overwrite_fallback(top, counter))
+				goto try_again;
+
 			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
-- 
cgit v1.2.3


From 06cc1a470ab237b991901729b125404c164f3660 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:29 -0800
Subject: perf hists browser: Add parameter to disable lost event warning

For overwrite mode, the ringbuffer will be paused. The event lost is
expected. It needs a way to notify the browser not print the warning.

It will be used later for perf top to disable lost event warning in
overwrite mode. There is no behavior change for now.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-15-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c       |  4 ++--
 tools/perf/builtin-report.c    |  3 ++-
 tools/perf/builtin-top.c       |  2 +-
 tools/perf/ui/browsers/hists.c | 38 +++++++++++++++++++++++++-------------
 tools/perf/ui/browsers/hists.h |  3 ++-
 tools/perf/util/hist.h         |  6 ++++--
 6 files changed, 36 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0815a37fdb5..539c3d460158 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
 	c2c_browser__update_nr_entries(browser);
 
 	while (1) {
-		key = hist_browser__run(browser, "? - help");
+		key = hist_browser__run(browser, "? - help", true);
 
 		switch (key) {
 		case 's':
@@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
 	c2c_browser__update_nr_entries(browser);
 
 	while (1) {
-		key = hist_browser__run(browser, "? - help");
+		key = hist_browser__run(browser, "? - help", true);
 
 		switch (key) {
 		case 'q':
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 42a52dcc41cd..4ad5dc649716 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep)
 	case 1:
 		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
 						    rep->min_percent,
-						    &session->header.env);
+						    &session->header.env,
+						    true);
 		/*
 		 * Usually "ret" is the last pressed key, and we only
 		 * care if the key notifies us to switch data file.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ee4bba1e282c..7def861a9ec4 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -611,7 +611,7 @@ static void *display_thread_tui(void *arg)
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      top->min_percent,
-				      &top->session->header.env);
+				      &top->session->header.env, true);
 
 	done = 1;
 	return NULL;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 68146f4620a5..6495ee55d9c3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
 	return browser->title ? browser->title(browser, bf, size) : 0;
 }
 
-int hist_browser__run(struct hist_browser *browser, const char *help)
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event)
 {
 	int key;
 	char title[160];
@@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
 			nr_entries = hist_browser__nr_entries(browser);
 			ui_browser__update_nr_entries(&browser->b, nr_entries);
 
-			if (browser->hists->stats.nr_lost_warned !=
-			    browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
+			if (warn_lost_event &&
+			    (browser->hists->stats.nr_lost_warned !=
+			    browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
 				browser->hists->stats.nr_lost_warned =
 					browser->hists->stats.nr_events[PERF_RECORD_LOST];
 				ui_browser__warn_lost_events(&browser->b);
@@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    bool left_exits,
 				    struct hist_browser_timer *hbt,
 				    float min_pcnt,
-				    struct perf_env *env)
+				    struct perf_env *env,
+				    bool warn_lost_event)
 {
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
@@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser, helpline);
+		key = hist_browser__run(browser, helpline,
+					warn_lost_event);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
@@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 
 static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 				int nr_events, const char *help,
-				struct hist_browser_timer *hbt)
+				struct hist_browser_timer *hbt,
+				bool warn_lost_event)
 {
 	struct perf_evlist *evlist = menu->b.priv;
 	struct perf_evsel *pos;
@@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 		case K_TIMER:
 			hbt->timer(hbt->arg);
 
-			if (!menu->lost_events_warned && menu->lost_events) {
+			if (!menu->lost_events_warned &&
+			    menu->lost_events &&
+			    warn_lost_event) {
 				ui_browser__warn_lost_events(&menu->b);
 				menu->lost_events_warned = true;
 			}
@@ -3224,7 +3231,8 @@ browse_hists:
 			key = perf_evsel__hists_browse(pos, nr_events, help,
 						       true, hbt,
 						       menu->min_pcnt,
-						       menu->env);
+						       menu->env,
+						       warn_lost_event);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 					   int nr_entries, const char *help,
 					   struct hist_browser_timer *hbt,
 					   float min_pcnt,
-					   struct perf_env *env)
+					   struct perf_env *env,
+					   bool warn_lost_event)
 {
 	struct perf_evsel *pos;
 	struct perf_evsel_menu menu = {
@@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 			menu.b.width = line_len;
 	}
 
-	return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
+	return perf_evsel_menu__run(&menu, nr_entries, help,
+				    hbt, warn_lost_event);
 }
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
-				  struct perf_env *env)
+				  struct perf_env *env,
+				  bool warn_lost_event)
 {
 	int nr_entries = evlist->nr_entries;
 
@@ -3325,7 +3336,7 @@ single_entry:
 
 		return perf_evsel__hists_browse(first, nr_entries, help,
 						false, hbt, min_pcnt,
-						env);
+						env, warn_lost_event);
 	}
 
 	if (symbol_conf.event_group) {
@@ -3342,5 +3353,6 @@ single_entry:
 	}
 
 	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-					       hbt, min_pcnt, env);
+					       hbt, min_pcnt, env,
+					       warn_lost_event);
 }
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index ba431777f559..9428bee076f2 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -28,7 +28,8 @@ struct hist_browser {
 
 struct hist_browser *hist_browser__new(struct hists *hists);
 void hist_browser__delete(struct hist_browser *browser);
-int hist_browser__run(struct hist_browser *browser, const char *help);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event);
 void hist_browser__init(struct hist_browser *browser,
 			struct hists *hists);
 #endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f6630cb95eff..02721b579746 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
-				  struct perf_env *env);
+				  struct perf_env *env,
+				  bool warn_lost_event);
 int script_browse(const char *script_opt);
 #else
 static inline
@@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
 				  const char *help __maybe_unused,
 				  struct hist_browser_timer *hbt __maybe_unused,
 				  float min_pcnt __maybe_unused,
-				  struct perf_env *env __maybe_unused)
+				  struct perf_env *env __maybe_unused,
+				  bool warn_lost_event __maybe_unused)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From a1ff5b05e988ca3620027148cd61013408ea4194 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:30 -0800
Subject: perf top: Remove lost events checking

There would be some records lost in overwrite mode because of pausing
the ringbuffer. It has little impact for the accuracy of the snapshot
and could be tolerated by 'perf top'.

Remove the lost events checking.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-16-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7def861a9ec4..59653062bb48 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top)
 
 	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
-	if (hists->stats.nr_lost_warned !=
-	    hists->stats.nr_events[PERF_RECORD_LOST]) {
+	if (!top->record_opts.overwrite &&
+	    (hists->stats.nr_lost_warned !=
+	    hists->stats.nr_events[PERF_RECORD_LOST])) {
 		hists->stats.nr_lost_warned =
 			      hists->stats.nr_events[PERF_RECORD_LOST];
 		color_fprintf(stdout, PERF_COLOR_RED,
@@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg)
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      top->min_percent,
-				      &top->session->header.env, true);
+				      &top->session->header.env,
+				      !top->record_opts.overwrite);
 
 	done = 1;
 	return NULL;
-- 
cgit v1.2.3


From ebebbf082357f86cc84a4d46ce897a5750e41b7a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:31 -0800
Subject: perf top: Switch default mode to overwrite mode

perf_top__mmap_read() has a severe performance issue in the Knights
Landing/Mill platform, when monitoring heavy load systems. It costs
several minutes to finish, which is unacceptable.

Currently, 'perf top' uses the non overwrite mode. For non overwrite
mode, it tries to read everything in the ringbuffer and doesn't pause
it. Once there are lots of samples delivered persistently, the
processing time could be very long. Also, the latest samples could be
lost when the ringbuffer is full.

For overwrite mode, it takes a snapshot for the system by pausing the
ringbuffer, which could significantly reduce the processing time.  Also,
the overwrite mode always keep the latest samples.  Considering the real
time requirement for 'perf top', the overwrite mode is more suitable for
it.

Actually, 'perf top' was overwrite mode. It is changed to non overwrite
mode since commit 93fc64f14472 ("perf top: Switch to non overwrite
mode"). It's better to change it back to overwrite mode by default.

For the kernel which doesn't support overwrite mode, it will fall back
to non overwrite mode.

There would be some records lost in overwrite mode because of pausing
the ringbuffer. It has little impact for the accuracy of the snapshot
and can be tolerated.

For overwrite mode, unconditionally wait 100 ms before each snapshot. It
also reduces the overhead caused by pausing ringbuffer, especially on
light load system.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-17-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 59653062bb48..2b4914f34ed6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -809,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
 	struct perf_sample sample;
 	struct perf_evsel *evsel;
+	struct perf_mmap *md;
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
+	u64 end, start;
 	int ret;
 
-	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
-		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
+	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+	if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+		return;
+
+	while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+		ret = perf_evlist__parse_sample(evlist, event, &sample);
 		if (ret) {
 			pr_err("Can't parse sample, err = %d\n", ret);
 			goto next_event;
@@ -871,16 +879,28 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 		} else
 			++session->evlist->stats.nr_unknown_events;
 next_event:
-		perf_evlist__mmap_consume(top->evlist, idx);
+		perf_mmap__consume(md, opts->overwrite);
 	}
+
+	perf_mmap__read_done(md);
 }
 
 static void perf_top__mmap_read(struct perf_top *top)
 {
+	bool overwrite = top->record_opts.overwrite;
+	struct perf_evlist *evlist = top->evlist;
 	int i;
 
+	if (overwrite)
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
 	for (i = 0; i < top->evlist->nr_mmaps; i++)
 		perf_top__mmap_read_idx(top, i);
+
+	if (overwrite) {
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+	}
 }
 
 /*
@@ -979,11 +999,6 @@ static int perf_top__start_counters(struct perf_top *top)
 		goto out_err;
 	}
 
-	if (opts->overwrite) {
-		ui__error("not support overwrite mode yet\n");
-		goto out_err;
-	}
-
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, counter) {
@@ -1144,7 +1159,7 @@ static int __cmd_top(struct perf_top *top)
 
 		perf_top__mmap_read(top);
 
-		if (hits == top->samples)
+		if (opts->overwrite || (hits == top->samples))
 			ret = perf_evlist__poll(top->evlist, 100);
 
 		if (resize) {
@@ -1238,6 +1253,7 @@ int cmd_top(int argc, const char **argv)
 				.uses_mmap   = true,
 			},
 			.proc_map_timeout    = 500,
+			.overwrite	= 1,
 		},
 		.max_stack	     = sysctl_perf_event_max_stack,
 		.sym_pcnt_filter     = 5,
-- 
cgit v1.2.3


From 8cc42de736b617827a4e7664fb8d7a325bc125bc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:32 -0800
Subject: perf top: Check the latency of perf_top__mmap_read()

The latency of perf_top__mmap_read() should be lower than refresh time.
If not, give some hints to reduce the latency.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-18-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2b4914f34ed6..b7c823ba8374 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -889,8 +889,10 @@ static void perf_top__mmap_read(struct perf_top *top)
 {
 	bool overwrite = top->record_opts.overwrite;
 	struct perf_evlist *evlist = top->evlist;
+	unsigned long long start, end;
 	int i;
 
+	start = rdclock();
 	if (overwrite)
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
 
@@ -901,6 +903,13 @@ static void perf_top__mmap_read(struct perf_top *top)
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
 	}
+	end = rdclock();
+
+	if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+		ui__warning("Too slow to read ring buffer.\n"
+			    "Please try increasing the period (-c) or\n"
+			    "decreasing the freq (-F) or\n"
+			    "limiting the number of CPUs (-C)\n");
 }
 
 /*
-- 
cgit v1.2.3


From 6677d26c8befa462eab9be6c5335a939011e7e65 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 29 Jan 2018 15:03:59 +0200
Subject: perf tools: Substitute yet another strtoull()

Instead of home grown function let's use what library provides us.

Signed-off-by: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20180129130359.1490-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/util.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 443892dabedb..1019bbc5dbd8 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -340,35 +340,15 @@ size_t hex_width(u64 v)
 	return n;
 }
 
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
 /*
  * While we find nice hex chars, build a long_val.
  * Return number of chars processed.
  */
 int hex2u64(const char *ptr, u64 *long_val)
 {
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
+	char *p;
 
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
+	*long_val = strtoull(ptr, &p, 16);
 
 	return p - ptr;
 }
-- 
cgit v1.2.3


From ba7e851642f48002def3450b279598c187721fd0 Mon Sep 17 00:00:00 2001
From: Sangwon Hong <qpakzk@gmail.com>
Date: Mon, 5 Feb 2018 20:48:35 +0900
Subject: perf data: Document missing --force option

Add the --force option to the man page.

Signed-off-by: Sangwon Hong <qpakzk@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1517831315-31490-1-git-send-email-qpakzk@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-data.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index f0796a47dfa3..90bb4aabe4f8 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -30,6 +30,10 @@ OPTIONS for 'convert'
 -i::
 	Specify input perf data file path.
 
+-f::
+--force::
+	Don't complain, do it.
+
 -v::
 --verbose::
         Be more verbose (show counter open errors, etc).
-- 
cgit v1.2.3


From 7a92453620d42c3a5fea94a864dc6aa04c262b93 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Wed, 17 Jan 2018 09:38:31 +0100
Subject: perf test: Fix test trace+probe_libc_inet_pton.sh for s390x

On Intel test case trace+probe_libc_inet_pton.sh succeeds and the
output is:

[root@f27 perf]# ./perf trace --no-syscalls
                  -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.037 ms

 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.037/0.037/0.037/0.000 ms
     0.000 probe_libc:inet_pton:(7fa40ac618a0))
              __GI___inet_pton (/usr/lib64/libc-2.26.so)
              getaddrinfo (/usr/lib64/libc-2.26.so)
              main (/usr/bin/ping)

The kernel stack unwinder is used, it is specified implicitly
as call-graph=fp (frame pointer).

On s390x only dwarf is available for stack unwinding. It is also
done in user space. This requires different parameter setup
and result checking for s390x and Intel.

This patch adds separate perf trace setup and result checking
for Intel and s390x. On s390x specify this command line to
get a call-graph and handle the different call graph result
checking:

[root@s35lp76 perf]# ./perf trace --no-syscalls
	-e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.041 ms

 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.041/0.041/0.041/0.000 ms
     0.000 probe_libc:inet_pton:(3ffb9942060))
            __GI___inet_pton (/usr/lib64/libc-2.26.so)
            gaih_inet (inlined)
            __GI_getaddrinfo (inlined)
            main (/usr/bin/ping)
            __libc_start_main (/usr/lib64/libc-2.26.so)
            _start (/usr/bin/ping)
[root@s35lp76 perf]#

Before:
[root@s8360047 perf]# ./perf test -vv 58
58: probe libc's inet_pton & backtrace it with ping       :
 --- start ---
test child forked, pid 26349
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.079 ms
 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.079/0.079/0.079/0.000 ms
0.000 probe_libc:inet_pton:(3ff925c2060))
test child finished with -1
 ---- end ----
probe libc's inet_pton & backtrace it with ping: FAILED!
[root@s8360047 perf]#

After:
[root@s35lp76 perf]# ./perf test -vv 57
57: probe libc's inet_pton & backtrace it with ping       :
 --- start ---
test child forked, pid 38708
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.038 ms
 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.038/0.038/0.038/0.000 ms
0.000 probe_libc:inet_pton:(3ff87342060))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
gaih_inet (inlined)
__GI_getaddrinfo (inlined)
main (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
_start (/usr/bin/ping)
test child finished with 0
 ---- end ----
probe libc's inet_pton & backtrace it with ping: Ok
[root@s35lp76 perf]#

On Intel the test case runs unchanged and succeeds.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180117083831.101001-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/tests/shell/trace+probe_libc_inet_pton.sh | 23 +++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 8b3da21a08f1..c446c894b297 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() {
 	expected[4]="rtt min.*"
 	expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
 	expected[6]=".*inet_pton[[:space:]]\($libc\)$"
-	expected[7]="getaddrinfo[[:space:]]\($libc\)$"
-	expected[8]=".*\(.*/bin/ping.*\)$"
-
-	perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
+	case "$(uname -m)" in
+	s390x)
+		eventattr='call-graph=dwarf'
+		expected[7]="gaih_inet[[:space:]]\(inlined\)$"
+		expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+		expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
+		expected[10]="__libc_start_main[[:space:]]\($libc\)$"
+		expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
+		;;
+	*)
+		eventattr='max-stack=3'
+		expected[7]="getaddrinfo[[:space:]]\($libc\)$"
+		expected[8]=".*\(.*/bin/ping.*\)$"
+		;;
+	esac
+
+	perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
 		echo $line
 		echo "$line" | egrep -q "${expected[$idx]}"
 		if [ $? -ne 0 ] ; then
@@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() {
 			exit 1
 		fi
 		let idx+=1
-		[ $idx -eq 9 ] && break
+		[ -z "${expected[$idx]}" ] && break
 	done
 }
 
-- 
cgit v1.2.3


From f091f1d6a2b4840c9b631d6138f5354401347863 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 13 Feb 2018 12:54:58 +0100
Subject: tools/headers: Synchronize kernel ABI headers, v4.16-rc1

Sync the following tooling headers with the latest kernel version:

  tools/arch/powerpc/include/uapi/asm/kvm.h
  tools/arch/x86/include/asm/cpufeatures.h
  tools/include/uapi/drm/i915_drm.h
  tools/include/uapi/linux/if_link.h
  tools/include/uapi/linux/kvm.h

All the changes are new ABI additions which don't impact their use
in existing tooling.

Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/arch/powerpc/include/uapi/asm/kvm.h |  2 +
 tools/arch/x86/include/asm/cpufeatures.h  |  1 +
 tools/include/uapi/drm/i915_drm.h         | 77 ++++++++++++++++++++++++++
 tools/include/uapi/linux/if_link.h        |  1 +
 tools/include/uapi/linux/kvm.h            | 90 +++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
+#define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1d9199e1c2ad..0dfe4d3f74e2 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -210,6 +210,7 @@
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index ac3c6503ca27..536ee4febd74 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -86,6 +86,62 @@ enum i915_mocs_table_index {
 	I915_MOCS_CACHED,
 };
 
+/*
+ * Different engines serve different roles, and there may be more than one
+ * engine serving each role. enum drm_i915_gem_engine_class provides a
+ * classification of the role of the engine, which may be used when requesting
+ * operations to be performed on a certain subset of engines, or for providing
+ * information about that group.
+ */
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_RENDER	= 0,
+	I915_ENGINE_CLASS_COPY		= 1,
+	I915_ENGINE_CLASS_VIDEO		= 2,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
+
+	I915_ENGINE_CLASS_INVALID	= -1
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_BUSY = 0,
+	I915_SAMPLE_WAIT = 1,
+	I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
@@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
 
+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8616131e2c61..6d9447700e18 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -163,6 +163,7 @@ enum {
 	IFLA_IF_NETNSID,
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
+	IFLA_NEW_IFINDEX,
 	__IFLA_MAX
 };
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8fb90a0819c3..0fb5ef939732 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1362,6 +1362,96 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_CMMA_MIGRATION */
 #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP      _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+	__u64 addr;
+	__u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+	/* Guest initialization commands */
+	KVM_SEV_INIT = 0,
+	KVM_SEV_ES_INIT,
+	/* Guest launch commands */
+	KVM_SEV_LAUNCH_START,
+	KVM_SEV_LAUNCH_UPDATE_DATA,
+	KVM_SEV_LAUNCH_UPDATE_VMSA,
+	KVM_SEV_LAUNCH_SECRET,
+	KVM_SEV_LAUNCH_MEASURE,
+	KVM_SEV_LAUNCH_FINISH,
+	/* Guest migration commands (outgoing) */
+	KVM_SEV_SEND_START,
+	KVM_SEV_SEND_UPDATE_DATA,
+	KVM_SEV_SEND_UPDATE_VMSA,
+	KVM_SEV_SEND_FINISH,
+	/* Guest migration commands (incoming) */
+	KVM_SEV_RECEIVE_START,
+	KVM_SEV_RECEIVE_UPDATE_DATA,
+	KVM_SEV_RECEIVE_UPDATE_VMSA,
+	KVM_SEV_RECEIVE_FINISH,
+	/* Guest status and debug commands */
+	KVM_SEV_GUEST_STATUS,
+	KVM_SEV_DBG_DECRYPT,
+	KVM_SEV_DBG_ENCRYPT,
+	/* Guest certificates commands */
+	KVM_SEV_CERT_EXPORT,
+
+	KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+	__u32 id;
+	__u64 data;
+	__u32 error;
+	__u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 dh_uaddr;
+	__u32 dh_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+	__u64 uaddr;
+	__u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+	__u64 uaddr;
+	__u32 len;
+};
+
+struct kvm_sev_guest_status {
+	__u32 handle;
+	__u32 policy;
+	__u32 state;
+};
+
+struct kvm_sev_dbg {
+	__u64 src_uaddr;
+	__u64 dst_uaddr;
+	__u32 len;
+};
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit v1.2.3


From baa676103037e0dd145bb905eb51bc0b2f48fd49 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 12:47:49 +0100
Subject: perf s390: Grab a copy of arch/s390/kernel/syscall/syscall.tbl

Grab a copy of the s390 system call table file introduced with commit
857f46bfb07f53dc112d69bdfb137cc5ec3da7c5 "s390/syscalls: add system call
table".

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org
LPU-Reference: 1518090470-2899-3-git-send-email-brueckner@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-hpw7vdjp7g92ivgpddrp5ydq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/entry/syscalls/syscall.tbl | 390 ++++++++++++++++++++++++
 1 file changed, 390 insertions(+)
 create mode 100644 tools/perf/arch/s390/entry/syscalls/syscall.tbl

(limited to 'tools')

diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..b38d48464368
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common	exit			sys_exit			sys_exit
+2    common	fork			sys_fork			sys_fork
+3    common	read			sys_read			compat_sys_s390_read
+4    common	write			sys_write			compat_sys_s390_write
+5    common	open			sys_open			compat_sys_open
+6    common	close			sys_close			sys_close
+7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
+8    common	creat			sys_creat			compat_sys_creat
+9    common	link			sys_link			compat_sys_link
+10   common	unlink			sys_unlink			compat_sys_unlink
+11   common	execve			sys_execve			compat_sys_execve
+12   common	chdir			sys_chdir			compat_sys_chdir
+13   32		time			-				compat_sys_time
+14   common	mknod			sys_mknod			compat_sys_mknod
+15   common	chmod			sys_chmod			compat_sys_chmod
+16   32		lchown			-				compat_sys_s390_lchown16
+19   common	lseek			sys_lseek			compat_sys_lseek
+20   common	getpid			sys_getpid			sys_getpid
+21   common	mount			sys_mount			compat_sys_mount
+22   common	umount			sys_oldumount			compat_sys_oldumount
+23   32		setuid			-				compat_sys_s390_setuid16
+24   32		getuid			-				compat_sys_s390_getuid16
+25   32		stime			-				compat_sys_stime
+26   common	ptrace			sys_ptrace			compat_sys_ptrace
+27   common	alarm			sys_alarm			sys_alarm
+29   common	pause			sys_pause			sys_pause
+30   common	utime			sys_utime			compat_sys_utime
+33   common	access			sys_access			compat_sys_access
+34   common	nice			sys_nice			sys_nice
+36   common	sync			sys_sync			sys_sync
+37   common	kill			sys_kill			sys_kill
+38   common	rename			sys_rename			compat_sys_rename
+39   common	mkdir			sys_mkdir			compat_sys_mkdir
+40   common	rmdir			sys_rmdir			compat_sys_rmdir
+41   common	dup			sys_dup				sys_dup
+42   common	pipe			sys_pipe			compat_sys_pipe
+43   common	times			sys_times			compat_sys_times
+45   common	brk			sys_brk				compat_sys_brk
+46   32		setgid			-				compat_sys_s390_setgid16
+47   32		getgid			-				compat_sys_s390_getgid16
+48   common	signal			sys_signal			compat_sys_signal
+49   32		geteuid			-				compat_sys_s390_geteuid16
+50   32		getegid			-				compat_sys_s390_getegid16
+51   common	acct			sys_acct			compat_sys_acct
+52   common	umount2			sys_umount			compat_sys_umount
+54   common	ioctl			sys_ioctl			compat_sys_ioctl
+55   common	fcntl			sys_fcntl			compat_sys_fcntl
+57   common	setpgid			sys_setpgid			sys_setpgid
+60   common	umask			sys_umask			sys_umask
+61   common	chroot			sys_chroot			compat_sys_chroot
+62   common	ustat			sys_ustat			compat_sys_ustat
+63   common	dup2			sys_dup2			sys_dup2
+64   common	getppid			sys_getppid			sys_getppid
+65   common	getpgrp			sys_getpgrp			sys_getpgrp
+66   common	setsid			sys_setsid			sys_setsid
+67   common	sigaction		sys_sigaction			compat_sys_sigaction
+70   32		setreuid		-				compat_sys_s390_setreuid16
+71   32		setregid		-				compat_sys_s390_setregid16
+72   common	sigsuspend		sys_sigsuspend			compat_sys_sigsuspend
+73   common	sigpending		sys_sigpending			compat_sys_sigpending
+74   common	sethostname		sys_sethostname			compat_sys_sethostname
+75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76   32		getrlimit		-				compat_sys_old_getrlimit
+77   common	getrusage		sys_getrusage			compat_sys_getrusage
+78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80   32		getgroups		-				compat_sys_s390_getgroups16
+81   32		setgroups		-				compat_sys_s390_setgroups16
+83   common	symlink			sys_symlink			compat_sys_symlink
+85   common	readlink		sys_readlink			compat_sys_readlink
+86   common	uselib			sys_uselib			compat_sys_uselib
+87   common	swapon			sys_swapon			compat_sys_swapon
+88   common	reboot			sys_reboot			compat_sys_reboot
+89   common	readdir			-				compat_sys_old_readdir
+90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
+91   common	munmap			sys_munmap			compat_sys_munmap
+92   common	truncate		sys_truncate			compat_sys_truncate
+93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94   common	fchmod			sys_fchmod			sys_fchmod
+95   32		fchown			-				compat_sys_s390_fchown16
+96   common	getpriority		sys_getpriority			sys_getpriority
+97   common	setpriority		sys_setpriority			sys_setpriority
+99   common	statfs			sys_statfs			compat_sys_statfs
+100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101  32		ioperm			-				-
+102  common	socketcall		sys_socketcall			compat_sys_socketcall
+103  common	syslog			sys_syslog			compat_sys_syslog
+104  common	setitimer		sys_setitimer			compat_sys_setitimer
+105  common	getitimer		sys_getitimer			compat_sys_getitimer
+106  common	stat			sys_newstat			compat_sys_newstat
+107  common	lstat			sys_newlstat			compat_sys_newlstat
+108  common	fstat			sys_newfstat			compat_sys_newfstat
+110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+111  common	vhangup			sys_vhangup			sys_vhangup
+112  common	idle			-				-
+114  common	wait4			sys_wait4			compat_sys_wait4
+115  common	swapoff			sys_swapoff			compat_sys_swapoff
+116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
+118  common	fsync			sys_fsync			sys_fsync
+119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
+120  common	clone			sys_clone			compat_sys_clone
+121  common	setdomainname		sys_setdomainname		compat_sys_setdomainname
+122  common	uname			sys_newuname			compat_sys_newuname
+124  common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+125  common	mprotect		sys_mprotect			compat_sys_mprotect
+126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+127  common	create_module		-				-
+128  common	init_module		sys_init_module			compat_sys_init_module
+129  common	delete_module		sys_delete_module		compat_sys_delete_module
+130  common	get_kernel_syms		-				-
+131  common	quotactl		sys_quotactl			compat_sys_quotactl
+132  common	getpgid			sys_getpgid			sys_getpgid
+133  common	fchdir			sys_fchdir			sys_fchdir
+134  common	bdflush			sys_bdflush			compat_sys_bdflush
+135  common	sysfs			sys_sysfs			compat_sys_sysfs
+136  common	personality		sys_s390_personality		sys_s390_personality
+137  common	afs_syscall		-				-
+138  32		setfsuid		-				compat_sys_s390_setfsuid16
+139  32		setfsgid		-				compat_sys_s390_setfsgid16
+140  32		_llseek			-				compat_sys_llseek
+141  common	getdents		sys_getdents			compat_sys_getdents
+142  32		_newselect		-				compat_sys_select
+142  64		select			sys_select			-
+143  common	flock			sys_flock			sys_flock
+144  common	msync			sys_msync			compat_sys_msync
+145  common	readv			sys_readv			compat_sys_readv
+146  common	writev			sys_writev			compat_sys_writev
+147  common	getsid			sys_getsid			sys_getsid
+148  common	fdatasync		sys_fdatasync			sys_fdatasync
+149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+150  common	mlock			sys_mlock			compat_sys_mlock
+151  common	munlock			sys_munlock			compat_sys_munlock
+152  common	mlockall		sys_mlockall			sys_mlockall
+153  common	munlockall		sys_munlockall			sys_munlockall
+154  common	sched_setparam		sys_sched_setparam		compat_sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		compat_sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		compat_sys_sched_setscheduler
+157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
+158  common	sched_yield		sys_sched_yield			sys_sched_yield
+159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
+160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
+162  common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+163  common	mremap			sys_mremap			compat_sys_mremap
+164  32		setresuid		-				compat_sys_s390_setresuid16
+165  32		getresuid		-				compat_sys_s390_getresuid16
+167  common	query_module		-				-
+168  common	poll			sys_poll			compat_sys_poll
+169  common	nfsservctl		-				-
+170  32		setresgid		-				compat_sys_s390_setresgid16
+171  32		getresgid		-				compat_sys_s390_getresgid16
+172  common	prctl			sys_prctl			compat_sys_prctl
+173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
+174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180  common	pread64			sys_pread64			compat_sys_s390_pread64
+181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
+182  32		chown			-				compat_sys_s390_chown16
+183  common	getcwd			sys_getcwd			compat_sys_getcwd
+184  common	capget			sys_capget			compat_sys_capget
+185  common	capset			sys_capset			compat_sys_capset
+186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+187  common	sendfile		sys_sendfile64			compat_sys_sendfile
+188  common	getpmsg			-				-
+189  common	putpmsg			-				-
+190  common	vfork			sys_vfork			sys_vfork
+191  32		ugetrlimit		-				compat_sys_getrlimit
+191  64		getrlimit		sys_getrlimit			-
+192  32		mmap2			-				compat_sys_s390_mmap2
+193  32		truncate64		-				compat_sys_s390_truncate64
+194  32		ftruncate64		-				compat_sys_s390_ftruncate64
+195  32		stat64			-				compat_sys_s390_stat64
+196  32		lstat64			-				compat_sys_s390_lstat64
+197  32		fstat64			-				compat_sys_s390_fstat64
+198  32		lchown32		-				compat_sys_lchown
+198  64		lchown			sys_lchown			-
+199  32		getuid32		-				sys_getuid
+199  64		getuid			sys_getuid			-
+200  32		getgid32		-				sys_getgid
+200  64		getgid			sys_getgid			-
+201  32		geteuid32		-				sys_geteuid
+201  64		geteuid			sys_geteuid			-
+202  32		getegid32		-				sys_getegid
+202  64		getegid			sys_getegid			-
+203  32		setreuid32		-				sys_setreuid
+203  64		setreuid		sys_setreuid			-
+204  32		setregid32		-				sys_setregid
+204  64		setregid		sys_setregid			-
+205  32		getgroups32		-				compat_sys_getgroups
+205  64		getgroups		sys_getgroups			-
+206  32		setgroups32		-				compat_sys_setgroups
+206  64		setgroups		sys_setgroups			-
+207  32		fchown32		-				sys_fchown
+207  64		fchown			sys_fchown			-
+208  32		setresuid32		-				sys_setresuid
+208  64		setresuid		sys_setresuid			-
+209  32		getresuid32		-				compat_sys_getresuid
+209  64		getresuid		sys_getresuid			-
+210  32		setresgid32		-				sys_setresgid
+210  64		setresgid		sys_setresgid			-
+211  32		getresgid32		-				compat_sys_getresgid
+211  64		getresgid		sys_getresgid			-
+212  32		chown32			-				compat_sys_chown
+212  64		chown			sys_chown			-
+213  32		setuid32		-				sys_setuid
+213  64		setuid			sys_setuid			-
+214  32		setgid32		-				sys_setgid
+214  64		setgid			sys_setgid			-
+215  32		setfsuid32		-				sys_setfsuid
+215  64		setfsuid		sys_setfsuid			-
+216  32		setfsgid32		-				sys_setfsgid
+216  64		setfsgid		sys_setfsgid			-
+217  common	pivot_root		sys_pivot_root			compat_sys_pivot_root
+218  common	mincore			sys_mincore			compat_sys_mincore
+219  common	madvise			sys_madvise			compat_sys_madvise
+220  common	getdents64		sys_getdents64			compat_sys_getdents64
+221  32		fcntl64			-				compat_sys_fcntl64
+222  common	readahead		sys_readahead			compat_sys_s390_readahead
+223  32		sendfile64		-				compat_sys_sendfile64
+224  common	setxattr		sys_setxattr			compat_sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			compat_sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			compat_sys_fsetxattr
+227  common	getxattr		sys_getxattr			compat_sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			compat_sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			compat_sys_fgetxattr
+230  common	listxattr		sys_listxattr			compat_sys_listxattr
+231  common	llistxattr		sys_llistxattr			compat_sys_llistxattr
+232  common	flistxattr		sys_flistxattr			compat_sys_flistxattr
+233  common	removexattr		sys_removexattr			compat_sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		compat_sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		compat_sys_fremovexattr
+236  common	gettid			sys_gettid			sys_gettid
+237  common	tkill			sys_tkill			sys_tkill
+238  common	futex			sys_futex			compat_sys_futex
+239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+241  common	tgkill			sys_tgkill			sys_tgkill
+243  common	io_setup		sys_io_setup			compat_sys_io_setup
+244  common	io_destroy		sys_io_destroy			compat_sys_io_destroy
+245  common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+246  common	io_submit		sys_io_submit			compat_sys_io_submit
+247  common	io_cancel		sys_io_cancel			compat_sys_io_cancel
+248  common	exit_group		sys_exit_group			sys_exit_group
+249  common	epoll_create		sys_epoll_create		sys_epoll_create
+250  common	epoll_ctl		sys_epoll_ctl			compat_sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			compat_sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		compat_sys_set_tid_address
+253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
+254  common	timer_create		sys_timer_create		compat_sys_timer_create
+255  common	timer_settime		sys_timer_settime		compat_sys_timer_settime
+256  common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
+258  common	timer_delete		sys_timer_delete		sys_timer_delete
+259  common	clock_settime		sys_clock_settime		compat_sys_clock_settime
+260  common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
+261  common	clock_getres		sys_clock_getres		compat_sys_clock_getres
+262  common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
+265  common	statfs64		sys_statfs64			compat_sys_statfs64
+266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+267  common	remap_file_pages	sys_remap_file_pages		compat_sys_remap_file_pages
+268  common	mbind			sys_mbind			compat_sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+271  common	mq_open			sys_mq_open			compat_sys_mq_open
+272  common	mq_unlink		sys_mq_unlink			compat_sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
+274  common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+278  common	add_key			sys_add_key			compat_sys_add_key
+279  common	request_key		sys_request_key			compat_sys_request_key
+280  common	keyctl			sys_keyctl			compat_sys_keyctl
+281  common	waitid			sys_waitid			compat_sys_waitid
+282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
+283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
+284  common	inotify_init		sys_inotify_init		sys_inotify_init
+285  common	inotify_add_watch	sys_inotify_add_watch		compat_sys_inotify_add_watch
+286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
+287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+288  common	openat			sys_openat			compat_sys_openat
+289  common	mkdirat			sys_mkdirat			compat_sys_mkdirat
+290  common	mknodat			sys_mknodat			compat_sys_mknodat
+291  common	fchownat		sys_fchownat			compat_sys_fchownat
+292  common	futimesat		sys_futimesat			compat_sys_futimesat
+293  32		fstatat64		-				compat_sys_s390_fstatat64
+293  64		newfstatat		sys_newfstatat			-
+294  common	unlinkat		sys_unlinkat			compat_sys_unlinkat
+295  common	renameat		sys_renameat			compat_sys_renameat
+296  common	linkat			sys_linkat			compat_sys_linkat
+297  common	symlinkat		sys_symlinkat			compat_sys_symlinkat
+298  common	readlinkat		sys_readlinkat			compat_sys_readlinkat
+299  common	fchmodat		sys_fchmodat			compat_sys_fchmodat
+300  common	faccessat		sys_faccessat			compat_sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6
+302  common	ppoll			sys_ppoll			compat_sys_ppoll
+303  common	unshare			sys_unshare			compat_sys_unshare
+304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+306  common	splice			sys_splice			compat_sys_splice
+307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
+308  common	tee			sys_tee				compat_sys_tee
+309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+310  common	move_pages		sys_move_pages			compat_sys_move_pages
+311  common	getcpu			sys_getcpu			compat_sys_getcpu
+312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+313  common	utimes			sys_utimes			compat_sys_utimes
+314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
+315  common	utimensat		sys_utimensat			compat_sys_utimensat
+316  common	signalfd		sys_signalfd			compat_sys_signalfd
+317  common	timerfd			-				-
+318  common	eventfd			sys_eventfd			sys_eventfd
+319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
+320  common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
+321  common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+323  common	eventfd2		sys_eventfd2			sys_eventfd2
+324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
+325  common	pipe2			sys_pipe2			compat_sys_pipe2
+326  common	dup3			sys_dup3			sys_dup3
+327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
+328  common	preadv			sys_preadv			compat_sys_preadv
+329  common	pwritev			sys_pwritev			compat_sys_pwritev
+330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+331  common	perf_event_open		sys_perf_event_open		compat_sys_perf_event_open
+332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
+333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+334  common	prlimit64		sys_prlimit64			compat_sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		compat_sys_name_to_handle_at
+336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+338  common	syncfs			sys_syncfs			sys_syncfs
+339  common	setns			sys_setns			sys_setns
+340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
+343  common	kcmp			sys_kcmp			compat_sys_kcmp
+344  common	finit_module		sys_finit_module		compat_sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		compat_sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		compat_sys_sched_getattr
+347  common	renameat2		sys_renameat2			compat_sys_renameat2
+348  common	seccomp			sys_seccomp			compat_sys_seccomp
+349  common	getrandom		sys_getrandom			compat_sys_getrandom
+350  common	memfd_create		sys_memfd_create		compat_sys_memfd_create
+351  common	bpf			sys_bpf				compat_sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
+354  common	execveat		sys_execveat			compat_sys_execveat
+355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
+356  common	membarrier		sys_membarrier			sys_membarrier
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+359  common	socket			sys_socket			sys_socket
+360  common	socketpair		sys_socketpair			compat_sys_socketpair
+361  common	bind			sys_bind			compat_sys_bind
+362  common	connect			sys_connect			compat_sys_connect
+363  common	listen			sys_listen			sys_listen
+364  common	accept4			sys_accept4			compat_sys_accept4
+365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
+366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+367  common	getsockname		sys_getsockname			compat_sys_getsockname
+368  common	getpeername		sys_getpeername			compat_sys_getpeername
+369  common	sendto			sys_sendto			compat_sys_sendto
+370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+373  common	shutdown		sys_shutdown			sys_shutdown
+374  common	mlock2			sys_mlock2			compat_sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		compat_sys_copy_file_range
+376  common	preadv2			sys_preadv2			compat_sys_preadv2
+377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+378  common	s390_guarded_storage	sys_s390_guarded_storage	compat_sys_s390_guarded_storage
+379  common	statx			sys_statx			compat_sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
-- 
cgit v1.2.3


From 690d22d9d4423b4522fb44a71145403eef2df834 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 12:47:50 +0100
Subject: perf s390: Rework system call table creation by using syscall.tbl

Recently, s390 uses a syscall.tbl input file to generate its system call
table and unistd uapi header files.  Hence, update mksyscalltbl to use
it as input to create the system table for perf.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org
LPU-Reference: 1518090470-2899-4-git-send-email-brueckner@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-bdyhllhsq1zgxv2qx4m377y6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/Makefile                    | 10 +++++++---
 tools/perf/arch/s390/entry/syscalls/mksyscalltbl | 18 +++++++-----------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 48228de415d0..dfa6e3103437 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 
 out    := $(OUTPUT)arch/s390/include/generated/asm
 header := $(out)/syscalls_64.c
-sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B $(sysdef) $(syskrn) >/dev/null) \
+        || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
 
 clean::
 	$(call QUIET_CLEAN, s390) $(RM) $(header)
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
index 7fa0d0abd419..72ecbb676370 100755
--- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
@@ -3,25 +3,23 @@
 #
 # Generate system call table for perf
 #
-#
-# Copyright IBM Corp. 2017
+# Copyright IBM Corp. 2017, 2018
 # Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
 #
 
-gcc=$1
-input=$2
+SYSCALL_TBL=$1
 
-if ! test -r $input; then
+if ! test -r $SYSCALL_TBL; then
 	echo "Could not read input file" >&2
 	exit 1
 fi
 
 create_table()
 {
-	local max_nr
+	local max_nr nr abi sc discard
 
 	echo 'static const char *syscalltbl_s390_64[] = {'
-	while read sc nr; do
+	while read nr abi sc discard; do
 		printf '\t[%d] = "%s",\n' $nr $sc
 		max_nr=$nr
 	done
@@ -29,8 +27,6 @@ create_table()
 	echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
 }
 
-
-$gcc -m64 -E -dM -x c  $input	       \
-	|sed -ne 's/^#define __NR_//p' \
-	|sort -t' ' -k2 -nu	       \
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL	\
+	|sort -k1 -n					\
 	|create_table
-- 
cgit v1.2.3


From f1d0b4cde922863004ce3f5f39e8662cc0686c96 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 12:47:48 +0100
Subject: Revert "tools include s390: Grab a copy of
 arch/s390/include/uapi/asm/unistd.h"

This reverts commit f120c7b187e6c418238710b48723ce141f467543 which is no
longer required with the introduction of a syscall.tbl on s390.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org
LPU-Reference: 1518090470-2899-2-git-send-email-brueckner@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-q1lg0nvhha1tk39ri9aqalcb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/s390/include/uapi/asm/unistd.h | 412 ------------------------------
 tools/perf/check-headers.sh               |   1 -
 2 files changed, 413 deletions(-)
 delete mode 100644 tools/arch/s390/include/uapi/asm/unistd.h

(limited to 'tools')

diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h
deleted file mode 100644
index 725120939051..000000000000
--- a/tools/arch/s390/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/unistd.h"
- */
-
-#ifndef _UAPI_ASM_S390_UNISTD_H_
-#define _UAPI_ASM_S390_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_exit                 1
-#define __NR_fork                 2
-#define __NR_read                 3
-#define __NR_write                4
-#define __NR_open                 5
-#define __NR_close                6
-#define __NR_restart_syscall	  7
-#define __NR_creat                8
-#define __NR_link                 9
-#define __NR_unlink              10
-#define __NR_execve              11
-#define __NR_chdir               12
-#define __NR_mknod               14
-#define __NR_chmod               15
-#define __NR_lseek               19
-#define __NR_getpid              20
-#define __NR_mount               21
-#define __NR_umount              22
-#define __NR_ptrace              26
-#define __NR_alarm               27
-#define __NR_pause               29
-#define __NR_utime               30
-#define __NR_access              33
-#define __NR_nice                34
-#define __NR_sync                36
-#define __NR_kill                37
-#define __NR_rename              38
-#define __NR_mkdir               39
-#define __NR_rmdir               40
-#define __NR_dup                 41
-#define __NR_pipe                42
-#define __NR_times               43
-#define __NR_brk                 45
-#define __NR_signal              48
-#define __NR_acct                51
-#define __NR_umount2             52
-#define __NR_ioctl               54
-#define __NR_fcntl               55
-#define __NR_setpgid             57
-#define __NR_umask               60
-#define __NR_chroot              61
-#define __NR_ustat               62
-#define __NR_dup2                63
-#define __NR_getppid             64
-#define __NR_getpgrp             65
-#define __NR_setsid              66
-#define __NR_sigaction           67
-#define __NR_sigsuspend          72
-#define __NR_sigpending          73
-#define __NR_sethostname         74
-#define __NR_setrlimit           75
-#define __NR_getrusage           77
-#define __NR_gettimeofday        78
-#define __NR_settimeofday        79
-#define __NR_symlink             83
-#define __NR_readlink            85
-#define __NR_uselib              86
-#define __NR_swapon              87
-#define __NR_reboot              88
-#define __NR_readdir             89
-#define __NR_mmap                90
-#define __NR_munmap              91
-#define __NR_truncate            92
-#define __NR_ftruncate           93
-#define __NR_fchmod              94
-#define __NR_getpriority         96
-#define __NR_setpriority         97
-#define __NR_statfs              99
-#define __NR_fstatfs            100
-#define __NR_socketcall         102
-#define __NR_syslog             103
-#define __NR_setitimer          104
-#define __NR_getitimer          105
-#define __NR_stat               106
-#define __NR_lstat              107
-#define __NR_fstat              108
-#define __NR_lookup_dcookie     110
-#define __NR_vhangup            111
-#define __NR_idle               112
-#define __NR_wait4              114
-#define __NR_swapoff            115
-#define __NR_sysinfo            116
-#define __NR_ipc                117
-#define __NR_fsync              118
-#define __NR_sigreturn          119
-#define __NR_clone              120
-#define __NR_setdomainname      121
-#define __NR_uname              122
-#define __NR_adjtimex           124
-#define __NR_mprotect           125
-#define __NR_sigprocmask        126
-#define __NR_create_module      127
-#define __NR_init_module        128
-#define __NR_delete_module      129
-#define __NR_get_kernel_syms    130
-#define __NR_quotactl           131
-#define __NR_getpgid            132
-#define __NR_fchdir             133
-#define __NR_bdflush            134
-#define __NR_sysfs              135
-#define __NR_personality        136
-#define __NR_afs_syscall        137 /* Syscall for Andrew File System */
-#define __NR_getdents           141
-#define __NR_flock              143
-#define __NR_msync              144
-#define __NR_readv              145
-#define __NR_writev             146
-#define __NR_getsid             147
-#define __NR_fdatasync          148
-#define __NR__sysctl            149
-#define __NR_mlock              150
-#define __NR_munlock            151
-#define __NR_mlockall           152
-#define __NR_munlockall         153
-#define __NR_sched_setparam             154
-#define __NR_sched_getparam             155
-#define __NR_sched_setscheduler         156
-#define __NR_sched_getscheduler         157
-#define __NR_sched_yield                158
-#define __NR_sched_get_priority_max     159
-#define __NR_sched_get_priority_min     160
-#define __NR_sched_rr_get_interval      161
-#define __NR_nanosleep          162
-#define __NR_mremap             163
-#define __NR_query_module       167
-#define __NR_poll               168
-#define __NR_nfsservctl         169
-#define __NR_prctl              172
-#define __NR_rt_sigreturn       173
-#define __NR_rt_sigaction       174
-#define __NR_rt_sigprocmask     175
-#define __NR_rt_sigpending      176
-#define __NR_rt_sigtimedwait    177
-#define __NR_rt_sigqueueinfo    178
-#define __NR_rt_sigsuspend      179
-#define __NR_pread64            180
-#define __NR_pwrite64           181
-#define __NR_getcwd             183
-#define __NR_capget             184
-#define __NR_capset             185
-#define __NR_sigaltstack        186
-#define __NR_sendfile           187
-#define __NR_getpmsg		188
-#define __NR_putpmsg		189
-#define __NR_vfork		190
-#define __NR_pivot_root         217
-#define __NR_mincore            218
-#define __NR_madvise            219
-#define __NR_getdents64		220
-#define __NR_readahead		222
-#define __NR_setxattr		224
-#define __NR_lsetxattr		225
-#define __NR_fsetxattr		226
-#define __NR_getxattr		227
-#define __NR_lgetxattr		228
-#define __NR_fgetxattr		229
-#define __NR_listxattr		230
-#define __NR_llistxattr		231
-#define __NR_flistxattr		232
-#define __NR_removexattr	233
-#define __NR_lremovexattr	234
-#define __NR_fremovexattr	235
-#define __NR_gettid		236
-#define __NR_tkill		237
-#define __NR_futex		238
-#define __NR_sched_setaffinity	239
-#define __NR_sched_getaffinity	240
-#define __NR_tgkill		241
-/* Number 242 is reserved for tux */
-#define __NR_io_setup		243
-#define __NR_io_destroy		244
-#define __NR_io_getevents	245
-#define __NR_io_submit		246
-#define __NR_io_cancel		247
-#define __NR_exit_group		248
-#define __NR_epoll_create	249
-#define __NR_epoll_ctl		250
-#define __NR_epoll_wait		251
-#define __NR_set_tid_address	252
-#define __NR_fadvise64		253
-#define __NR_timer_create	254
-#define __NR_timer_settime	255
-#define __NR_timer_gettime	256
-#define __NR_timer_getoverrun	257
-#define __NR_timer_delete	258
-#define __NR_clock_settime	259
-#define __NR_clock_gettime	260
-#define __NR_clock_getres	261
-#define __NR_clock_nanosleep	262
-/* Number 263 is reserved for vserver */
-#define __NR_statfs64		265
-#define __NR_fstatfs64		266
-#define __NR_remap_file_pages	267
-#define __NR_mbind		268
-#define __NR_get_mempolicy	269
-#define __NR_set_mempolicy	270
-#define __NR_mq_open		271
-#define __NR_mq_unlink		272
-#define __NR_mq_timedsend	273
-#define __NR_mq_timedreceive	274
-#define __NR_mq_notify		275
-#define __NR_mq_getsetattr	276
-#define __NR_kexec_load		277
-#define __NR_add_key		278
-#define __NR_request_key	279
-#define __NR_keyctl		280
-#define __NR_waitid		281
-#define __NR_ioprio_set		282
-#define __NR_ioprio_get		283
-#define __NR_inotify_init	284
-#define __NR_inotify_add_watch	285
-#define __NR_inotify_rm_watch	286
-#define __NR_migrate_pages	287
-#define __NR_openat		288
-#define __NR_mkdirat		289
-#define __NR_mknodat		290
-#define __NR_fchownat		291
-#define __NR_futimesat		292
-#define __NR_unlinkat		294
-#define __NR_renameat		295
-#define __NR_linkat		296
-#define __NR_symlinkat		297
-#define __NR_readlinkat		298
-#define __NR_fchmodat		299
-#define __NR_faccessat		300
-#define __NR_pselect6		301
-#define __NR_ppoll		302
-#define __NR_unshare		303
-#define __NR_set_robust_list	304
-#define __NR_get_robust_list	305
-#define __NR_splice		306
-#define __NR_sync_file_range	307
-#define __NR_tee		308
-#define __NR_vmsplice		309
-#define __NR_move_pages		310
-#define __NR_getcpu		311
-#define __NR_epoll_pwait	312
-#define __NR_utimes		313
-#define __NR_fallocate		314
-#define __NR_utimensat		315
-#define __NR_signalfd		316
-#define __NR_timerfd		317
-#define __NR_eventfd		318
-#define __NR_timerfd_create	319
-#define __NR_timerfd_settime	320
-#define __NR_timerfd_gettime	321
-#define __NR_signalfd4		322
-#define __NR_eventfd2		323
-#define __NR_inotify_init1	324
-#define __NR_pipe2		325
-#define __NR_dup3		326
-#define __NR_epoll_create1	327
-#define	__NR_preadv		328
-#define	__NR_pwritev		329
-#define __NR_rt_tgsigqueueinfo	330
-#define __NR_perf_event_open	331
-#define __NR_fanotify_init	332
-#define __NR_fanotify_mark	333
-#define __NR_prlimit64		334
-#define __NR_name_to_handle_at	335
-#define __NR_open_by_handle_at	336
-#define __NR_clock_adjtime	337
-#define __NR_syncfs		338
-#define __NR_setns		339
-#define __NR_process_vm_readv	340
-#define __NR_process_vm_writev	341
-#define __NR_s390_runtime_instr 342
-#define __NR_kcmp		343
-#define __NR_finit_module	344
-#define __NR_sched_setattr	345
-#define __NR_sched_getattr	346
-#define __NR_renameat2		347
-#define __NR_seccomp		348
-#define __NR_getrandom		349
-#define __NR_memfd_create	350
-#define __NR_bpf		351
-#define __NR_s390_pci_mmio_write	352
-#define __NR_s390_pci_mmio_read		353
-#define __NR_execveat		354
-#define __NR_userfaultfd	355
-#define __NR_membarrier		356
-#define __NR_recvmmsg		357
-#define __NR_sendmmsg		358
-#define __NR_socket		359
-#define __NR_socketpair		360
-#define __NR_bind		361
-#define __NR_connect		362
-#define __NR_listen		363
-#define __NR_accept4		364
-#define __NR_getsockopt		365
-#define __NR_setsockopt		366
-#define __NR_getsockname	367
-#define __NR_getpeername	368
-#define __NR_sendto		369
-#define __NR_sendmsg		370
-#define __NR_recvfrom		371
-#define __NR_recvmsg		372
-#define __NR_shutdown		373
-#define __NR_mlock2		374
-#define __NR_copy_file_range	375
-#define __NR_preadv2		376
-#define __NR_pwritev2		377
-#define __NR_s390_guarded_storage	378
-#define __NR_statx		379
-#define __NR_s390_sthyi		380
-#define NR_syscalls 381
-
-/* 
- * There are some system calls that are not present on 64 bit, some
- * have a different name although they do the same (e.g. __NR_chown32
- * is __NR_chown on 64 bit).
- */
-#ifndef __s390x__
-
-#define __NR_time		 13
-#define __NR_lchown		 16
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_getrlimit		 76
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_fchown		 95
-#define __NR_ioperm		101
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR__newselect 	142
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_chown		182
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_fcntl64		221
-#define __NR_sendfile64		223
-#define __NR_fadvise64_64	264
-#define __NR_fstatat64		293
-
-#else
-
-#define __NR_select		142
-#define __NR_getrlimit		191	/* SuS compliant getrlimit */
-#define __NR_lchown  		198
-#define __NR_getuid  		199
-#define __NR_getgid  		200
-#define __NR_geteuid  		201
-#define __NR_getegid  		202
-#define __NR_setreuid  		203
-#define __NR_setregid  		204
-#define __NR_getgroups  	205
-#define __NR_setgroups  	206
-#define __NR_fchown  		207
-#define __NR_setresuid  	208
-#define __NR_getresuid  	209
-#define __NR_setresgid  	210
-#define __NR_getresgid  	211
-#define __NR_chown  		212
-#define __NR_setuid  		213
-#define __NR_setgid  		214
-#define __NR_setfsuid  		215
-#define __NR_setfsgid  		216
-#define __NR_newfstatat		293
-
-#endif
-
-#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 51abdb0a4047..790ec25919a0 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h
 arch/s390/include/uapi/asm/kvm_perf.h
 arch/s390/include/uapi/asm/ptrace.h
 arch/s390/include/uapi/asm/sie.h
-arch/s390/include/uapi/asm/unistd.h
 arch/arm/include/uapi/asm/kvm.h
 arch/arm64/include/uapi/asm/kvm.h
 arch/alpha/include/uapi/asm/errno.h
-- 
cgit v1.2.3