From 39b759cad5da88ee8ac55e186a00da4692b44e17 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 14 Nov 2017 13:15:33 -0700 Subject: tools/hv: add install target to Makefile Makefiles usually come with 'install' target included so each distro doesn't need to implement the procedure from scratch. Add it to tools/hv. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- tools/hv/Makefile | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 31503819454d..1139d71fa0cf 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -7,9 +7,30 @@ CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include -all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon +sbindir ?= /usr/sbin +libexecdir ?= /usr/libexec +sharedstatedir ?= /var/lib + +ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon + +ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh + +all: $(ALL_PROGRAMS) + %: %.c $(CC) $(CFLAGS) -o $@ $^ clean: $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon + +install: all + install -d -m 755 $(DESTDIR)$(sbindir); \ + install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \ + install -d -m 755 $(DESTDIR)$(sharedstatedir); \ + for program in $(ALL_PROGRAMS); do \ + install $$program -m 755 $(DESTDIR)$(sbindir); \ + done; \ + install -m 755 lsvmbus $(DESTDIR)$(sbindir); \ + for script in $(ALL_SCRIPTS); do \ + install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \ + done -- cgit v1.2.3 From 65c79230576873b312c3599479c1e42355c9f349 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 20 Nov 2017 09:45:35 -0800 Subject: test_firmware: fix setting old custom fw path back on exit The file /sys/module/firmware_class/parameters/path can be used to set a custom firmware path. The fw_filesystem.sh script creates a temporary directory to add a test firmware file to be used during testing, in order for this to work it uses the custom path syfs file and it was supposed to reset back the file on execution exit. The script failed to do this due to a typo, it was using OLD_PATH instead of OLD_FWPATH, since its inception since v3.17. Its not as easy to just keep the old setting, it turns out that resetting an empty setting won't actually do what we want, we need to check if it was empty and set an empty space. Without this we end up having the temporary path always set after we run these tests. Fixes: 0a8adf58475 ("test: add firmware_class loader test") Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_filesystem.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index b1f20fef36c7..f9508e1a4058 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -45,7 +45,10 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + OLD_FWPATH=" " + fi + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path rm -f "$FW" rmdir "$FWPATH" } -- cgit v1.2.3 From 881c23de02fe7d64489577fcc95912fdf724d966 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 20 Nov 2017 10:23:59 -0800 Subject: test_firmware: wrap sysfs timeout test into helper This cannot run on all kernel builds. This will help us later skip this test on kernel configs where non-applicable. Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_fallback.sh | 69 +++++++++++++------------ 1 file changed, 37 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 34a42c68ebfb..e364631837d6 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -175,39 +175,44 @@ trap "test_finish" EXIT echo "ABCD0123" >"$FW" NAME=$(basename "$FW") -DEVPATH="$DIR"/"nope-$NAME"/loading - -# Test failure when doing nothing (timeout works). -echo -n 2 >/sys/class/firmware/timeout -echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null & - -# Give the kernel some time to load the loading file, must be less -# than the timeout above. -sleep 1 -if [ ! -f $DEVPATH ]; then - echo "$0: fallback mechanism immediately cancelled" - echo "" - echo "The file never appeared: $DEVPATH" - echo "" - echo "This might be a distribution udev rule setup by your distribution" - echo "to immediately cancel all fallback requests, this must be" - echo "removed before running these tests. To confirm look for" - echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" - echo "and see if you have something like this:" - echo "" - echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" - echo "" - echo "If you do remove this file or comment out this line before" - echo "proceeding with these tests." - exit 1 -fi +test_syfs_timeout() +{ + DEVPATH="$DIR"/"nope-$NAME"/loading + + # Test failure when doing nothing (timeout works). + echo -n 2 >/sys/class/firmware/timeout + echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null & + + # Give the kernel some time to load the loading file, must be less + # than the timeout above. + sleep 1 + if [ ! -f $DEVPATH ]; then + echo "$0: fallback mechanism immediately cancelled" + echo "" + echo "The file never appeared: $DEVPATH" + echo "" + echo "This might be a distribution udev rule setup by your distribution" + echo "to immediately cancel all fallback requests, this must be" + echo "removed before running these tests. To confirm look for" + echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" + echo "and see if you have something like this:" + echo "" + echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" + echo "" + echo "If you do remove this file or comment out this line before" + echo "proceeding with these tests." + exit 1 + fi -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not expected to match" >&2 - exit 1 -else - echo "$0: timeout works" -fi + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not expected to match" >&2 + exit 1 + else + echo "$0: timeout works" + fi +} + +test_syfs_timeout # Put timeout high enough for us to do work but not so long that failures # slow down this test too much. -- cgit v1.2.3 From 59106c815895a6b3a33f296f856137090aa2b83e Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 20 Nov 2017 10:24:00 -0800 Subject: test_firmware: wrap basic sysfs fallback tests into helper These cannot run on all kernel builds. This will help us later skip this test on kernel configs where non-applicable. Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_fallback.sh | 78 +++++++++++++------------ 1 file changed, 41 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index e364631837d6..0d4527c5e8a4 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -212,37 +212,51 @@ test_syfs_timeout() fi } -test_syfs_timeout +run_sysfs_main_tests() +{ + test_syfs_timeout + # Put timeout high enough for us to do work but not so long that failures + # slow down this test too much. + echo 4 >/sys/class/firmware/timeout -# Put timeout high enough for us to do work but not so long that failures -# slow down this test too much. -echo 4 >/sys/class/firmware/timeout + # Load this script instead of the desired firmware. + load_fw "$NAME" "$0" + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not expected to match" >&2 + exit 1 + else + echo "$0: firmware comparison works" + fi -# Load this script instead of the desired firmware. -load_fw "$NAME" "$0" -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not expected to match" >&2 - exit 1 -else - echo "$0: firmware comparison works" -fi + # Do a proper load, which should work correctly. + load_fw "$NAME" "$FW" + if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: fallback mechanism works" + fi -# Do a proper load, which should work correctly. -load_fw "$NAME" "$FW" -if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not loaded" >&2 - exit 1 -else - echo "$0: fallback mechanism works" -fi + load_fw_cancel "nope-$NAME" "$FW" + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was expected to be cancelled" >&2 + exit 1 + else + echo "$0: cancelling fallback mechanism works" + fi -load_fw_cancel "nope-$NAME" "$FW" -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was expected to be cancelled" >&2 - exit 1 -else - echo "$0: cancelling fallback mechanism works" -fi + set +e + load_fw_fallback_with_child "nope-signal-$NAME" "$FW" + if [ "$?" -eq 0 ]; then + echo "$0: SIGCHLD on sync ignored as expected" >&2 + else + echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2 + exit 1 + fi + set -e +} + +run_sysfs_main_tests if load_fw_custom "$NAME" "$FW" ; then if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then @@ -262,14 +276,4 @@ if load_fw_custom_cancel "nope-$NAME" "$FW" ; then fi fi -set +e -load_fw_fallback_with_child "nope-signal-$NAME" "$FW" -if [ "$?" -eq 0 ]; then - echo "$0: SIGCHLD on sync ignored as expected" >&2 -else - echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2 - exit 1 -fi -set -e - exit 0 -- cgit v1.2.3 From 82bdf495169f2e4c25c5bd0f9384b687d358ebac Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 20 Nov 2017 10:24:01 -0800 Subject: test_firmware: wrap custom sysfs load tests into helper These can run on certain kernel configs. This will allow us later to enable these tests under the right kernel configurations. Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_fallback.sh | 43 ++++++++++++++++--------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 0d4527c5e8a4..722cad91df74 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -256,24 +256,37 @@ run_sysfs_main_tests() set -e } -run_sysfs_main_tests +run_sysfs_custom_load_tests() +{ + if load_fw_custom "$NAME" "$FW" ; then + if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: custom fallback loading mechanism works" + fi + fi -if load_fw_custom "$NAME" "$FW" ; then - if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not loaded" >&2 - exit 1 - else - echo "$0: custom fallback loading mechanism works" + if load_fw_custom "$NAME" "$FW" ; then + if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: custom fallback loading mechanism works" + fi fi -fi -if load_fw_custom_cancel "nope-$NAME" "$FW" ; then - if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was expected to be cancelled" >&2 - exit 1 - else - echo "$0: cancelling custom fallback mechanism works" + if load_fw_custom_cancel "nope-$NAME" "$FW" ; then + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was expected to be cancelled" >&2 + exit 1 + else + echo "$0: cancelling custom fallback mechanism works" + fi fi -fi +} + +run_sysfs_main_tests +run_sysfs_custom_load_tests exit 0 -- cgit v1.2.3 From 6a28b446b7d2d5455080d5b772c50b99859d6cf5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:41 -0800 Subject: selftests/bpf: adjust test_align expected output since verifier started to print liveness state of the registers adjust expected output of test_align. Now this test checks for both proper alignment handling by verifier and correctness of liveness marks. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_align.c | 156 +++++++++++++++---------------- 1 file changed, 78 insertions(+), 78 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 8591c89c0828..fe916d29e166 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -64,11 +64,11 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv2"}, - {2, "R3=inv4"}, - {3, "R3=inv8"}, - {4, "R3=inv16"}, - {5, "R3=inv32"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@ -92,17 +92,17 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv1"}, - {2, "R3=inv2"}, - {3, "R3=inv4"}, - {4, "R3=inv8"}, - {5, "R3=inv16"}, - {6, "R3=inv1"}, - {7, "R4=inv32"}, - {8, "R4=inv16"}, - {9, "R4=inv8"}, - {10, "R4=inv4"}, - {11, "R4=inv2"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@ -121,12 +121,12 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv4"}, - {2, "R3=inv8"}, - {3, "R3=inv10"}, - {4, "R4=inv8"}, - {5, "R4=inv12"}, - {6, "R4=inv14"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@ -143,10 +143,10 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv7"}, - {2, "R3=inv7"}, - {3, "R3=inv14"}, - {4, "R3=inv56"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, }, @@ -185,18 +185,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -217,16 +217,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -257,14 +257,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -320,11 +320,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -336,11 +336,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -352,18 +352,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -410,11 +410,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -426,11 +426,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -473,11 +473,11 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, /* ptr & 0x40 == either 0 or 0x40 */ - {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, + {5, "R5_w=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, /* ptr << 2 == unknown, (4n) */ - {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + {7, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ @@ -485,7 +485,7 @@ static struct bpf_align_test tests[] = { /* Checked s>=0 */ {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, /* packet pointer + nonnegative (4n+2) */ - {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able @@ -530,11 +530,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, /* Checked s>= 0 */ @@ -583,15 +583,15 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so -- cgit v1.2.3 From 417ec26477a5c19abc72dd0298f48ebe5d2db43a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:09:00 -0800 Subject: selftests/bpf: add offload test based on netdevsim Add a test of BPF offload control path interfaces based on just-added netdevsim driver. Perform various checks of both the stack and the expected driver behaviour. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 5 +- tools/testing/selftests/bpf/sample_ret0.c | 7 + tools/testing/selftests/bpf/test_offload.py | 681 ++++++++++++++++++++++++++++ 3 files changed, 691 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/sample_ret0.c create mode 100755 tools/testing/selftests/bpf/test_offload.py (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 333a48655ee0..2c9d8c63c6fa 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -17,9 +17,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ + test_offload.py include ../lib.mk diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c new file mode 100644 index 000000000000..fec99750d6ea --- /dev/null +++ b/tools/testing/selftests/bpf/sample_ret0.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +/* Sample program which should always load for testing control paths. */ +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py new file mode 100755 index 000000000000..3914f7a4585a --- /dev/null +++ b/tools/testing/selftests/bpf/test_offload.py @@ -0,0 +1,681 @@ +#!/usr/bin/python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import json +import os +import pprint +import subprocess +import time + +logfile = None +log_level = 1 +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + log("FAIL: " + msg, "", level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, fail=True): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + ret, out = cmd(name + " " + params + args, fail=fail) + if JSON and len(out.strip()) != 0: + return ret, json.loads(out) + else: + return ret, out + +def bpftool(args, JSON=True, fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail) + +def bpftool_prog_list(expected=None): + _, progs = bpftool("prog show", JSON=True, fail=True) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def ip(args, force=False, JSON=True, fail=True): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail) + +def tc(args, JSON=True, fail=True): + return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec=".text", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + p = os.path.join(path, f) + if os.path.isfile(p): + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self): + self.dev = self._netdevsim_create() + devs.append(self) + + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def _netdevsim_create(self): + _, old = ip("link show") + ip("link add sim%d type netdevsim") + _, new = ip("link show") + + for dev in new: + f = filter(lambda x: x["ifname"] == dev["ifname"], old) + if len(list(f)) == 0: + return dev + + raise Exception("failed to create netdevsim device") + + def remove(self): + devs.remove(self) + ip("link del dev %s" % (self.dev["ifname"])) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, fail=True): + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, fail=fail) + + def unset_xdp(self, mode, force=False, fail=True): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, fail=fail) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False, + fail=True): + params = "" + if da: + params += " da" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + return tc("filter add dev %s ingress bpf %s %s" % + (self['ifname'], bpf, params), fail=fail) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +# Check no BPF programs are loaded +skip(len(progs) != 0, "BPF programs already loaded on the system") + +# Check netdevsim +ret, out = cmd("modprobe netdevsim", fail=False) +skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +try: + obj = bpf_obj("sample_ret0.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + sim = NetdevSim() + sim.set_xdp(obj, "generic") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False) + fail(ret == 0, "TC bytecode loaded for offload") + sim.wait_for_flush() + + start_test("Test TC offloads work...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + + start_test("Test TC offload basics...") + dfs = sim.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace with bad flags...") + ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) + fail(ret == 0, "Replaced XDP program with a program in different mode") + ret, _ = sim.set_xdp(obj, "", force=True, fail=False) + fail(ret == 0, "Replaced XDP program with a program in different mode") + + start_test("Test XDP prog remove with bad flags...") + ret, _ = sim.unset_xdp("offload", force=True, fail=False) + fail(ret == 0, "Removed program with a bad mode mode") + ret, _ = sim.unset_xdp("", force=True, fail=False) + fail(ret == 0, "Removed program with a bad mode mode") + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test XDP offload...") + sim.set_xdp(obj, "offload") + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + + start_test("Test XDP offload is device bound...") + dfs = sim.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + sim2 = NetdevSim() + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _ = sim.set_xdp(pinned, "offload", fail=False) + fail(ret == 0, "Pinned program loaded for a different device accepted") + sim2.remove() + ret, _ = sim.set_xdp(pinned, "offload", fail=False) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + fail(ret == 0, "Loading TC when XDP active should fail") + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _ = sim.set_xdp(obj, "offload", fail=False) + fail(ret == 0, "Loading XDP when TC active should fail") + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + # The above will trigger a splat until TC cls_bpf drivers are fixed + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while sim.dfs_num_bound_progs() <= 2: + pass + sim.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() -- cgit v1.2.3 From 5783ee6ec3d3323a04cc69764d57cac7bf026332 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 21 Nov 2017 18:17:19 +1100 Subject: selftests/powerpc: Check for pthread errors in tm-unavailable Signed-off-by: Cyril Bur Signed-off-by: Gustavo Romero Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/tm/tm-unavailable.c | 43 +++++++++++++++++----- 1 file changed, 34 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 96c37f84ce54..e6a0fad2bfd0 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -15,6 +15,7 @@ */ #define _GNU_SOURCE +#include #include #include #include @@ -33,6 +34,11 @@ #define VSX_UNA_EXCEPTION 2 #define NUM_EXCEPTIONS 3 +#define err_at_line(status, errnum, format, ...) \ + error_at_line(status, errnum, __FILE__, __LINE__, format ##__VA_ARGS__) + +#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__) +#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__) struct Flags { int touch_fp; @@ -303,10 +309,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) * checking if the failure cause is the one we expect. */ do { + int rc; + /* Bind 'ping' to CPU 0, as specified in 'attr'. */ - pthread_create(&t0, attr, ping, (void *) &flags); - pthread_setname_np(t0, "ping"); - pthread_join(t0, &ret_value); + rc = pthread_create(&t0, attr, ping, (void *) &flags); + if (rc) + pr_err(rc, "pthread_create()"); + rc = pthread_setname_np(t0, "ping"); + if (rc) + pr_warn(rc, "pthread_setname_np"); + rc = pthread_join(t0, &ret_value); + if (rc) + pr_err(rc, "pthread_join"); + retries--; } while (ret_value != NULL && retries); @@ -320,7 +335,7 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) int main(int argc, char **argv) { - int exception; /* FP = 0, VEC = 1, VSX = 2 */ + int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; @@ -330,13 +345,23 @@ int main(int argc, char **argv) CPU_SET(0, &cpuset); /* Init pthread attribute. */ - pthread_attr_init(&attr); + rc = pthread_attr_init(&attr); + if (rc) + pr_err(rc, "pthread_attr_init()"); /* Set CPU 0 mask into the pthread attribute. */ - pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); - - pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); - pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */ + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) + pr_err(rc, "pthread_attr_setaffinity_np()"); + + rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); + if (rc) + pr_err(rc, "pthread_create()"); + + /* Name it for systemtap convenience */ + rc = pthread_setname_np(t1, "pong"); + if (rc) + pr_warn(rc, "pthread_create()"); flags.result = 0; -- cgit v1.2.3 From cdd77d3e193031cc67426cd671d8aa370f7dfee4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 17 Nov 2017 16:23:08 -0800 Subject: nfit, libnvdimm: deprecate the generic SMART ioctl The kernel's ND_IOCTL_SMART_THRESHOLD command is based on a payload definition that has become broken / out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL definition. Deprecate the use of the ND_IOCTL_SMART_THRESHOLD command in favor of the ND_CMD_CALL approach taken by NVDIMM_FAMILY_{HPE,MSFT}, where we can manage the per-vendor variance in userspace. In a couple years, when the new scheme is widely deployed in userspace packages, the ND_IOCTL_SMART_THRESHOLD support can be removed. For now we prevent new binaries from compiling against the kernel header definitions, but kernel still compatible with old binaries. The libndctl.h [1] header is now the authoritative interface definition for NVDIMM SMART. [1]: https://github.com/pmem/ndctl Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 39 ++++++++++++++--------- tools/testing/nvdimm/test/nfit_test.h | 59 +++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 7217b2b953b5..640c02b08a50 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -440,39 +440,50 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus, return 0; } -static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) +static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len) { - static const struct nd_smart_payload smart_data = { - .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID - | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID - | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, - .health = ND_SMART_NON_CRITICAL_HEALTH, - .temperature = 23 * 16, + static const struct nd_intel_smart smart_data = { + .flags = ND_INTEL_SMART_HEALTH_VALID + | ND_INTEL_SMART_SPARES_VALID + | ND_INTEL_SMART_ALARM_VALID + | ND_INTEL_SMART_USED_VALID + | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_MTEMP_VALID, + .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, + .media_temperature = 23 * 16, + .ctrl_temperature = 30 * 16, + .pmic_temperature = 40 * 16, .spares = 75, - .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, + .alarm_flags = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .ait_status = 1, .life_used = 5, .shutdown_state = 0, .vendor_size = 0, + .shutdown_count = 100, }; if (buf_len < sizeof(*smart)) return -EINVAL; - memcpy(smart->data, &smart_data, sizeof(smart_data)); + memcpy(smart, &smart_data, sizeof(smart_data)); return 0; } -static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, +static int nfit_test_cmd_smart_threshold( + struct nd_intel_smart_threshold *smart_t, unsigned int buf_len) { - static const struct nd_smart_threshold_payload smart_t_data = { - .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .temperature = 40 * 16, + static const struct nd_intel_smart_threshold smart_t_data = { + .alarm_control = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .media_temperature = 40 * 16, + .ctrl_temperature = 30 * 16, .spares = 5, }; if (buf_len < sizeof(*smart_t)) return -EINVAL; - memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); + memcpy(smart_t, &smart_t_data, sizeof(smart_t_data)); return 0; } diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 113b44675a71..b85fba2856c7 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -84,6 +84,65 @@ struct nd_cmd_ars_err_inj_stat { } __packed record[0]; } __packed; +#define ND_INTEL_SMART 1 +#define ND_INTEL_SMART_THRESHOLD 2 + +#define ND_INTEL_SMART_HEALTH_VALID (1 << 0) +#define ND_INTEL_SMART_SPARES_VALID (1 << 1) +#define ND_INTEL_SMART_USED_VALID (1 << 2) +#define ND_INTEL_SMART_MTEMP_VALID (1 << 3) +#define ND_INTEL_SMART_CTEMP_VALID (1 << 4) +#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5) +#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6) +#define ND_INTEL_SMART_PTEMP_VALID (1 << 7) +#define ND_INTEL_SMART_ALARM_VALID (1 << 9) +#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10) +#define ND_INTEL_SMART_VENDOR_VALID (1 << 11) +#define ND_INTEL_SMART_SPARE_TRIP (1 << 0) +#define ND_INTEL_SMART_TEMP_TRIP (1 << 1) +#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2) +#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) +#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) +#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) + +struct nd_intel_smart { + __u32 status; + union { + struct { + __u32 flags; + __u8 reserved0[4]; + __u8 health; + __u8 spares; + __u8 life_used; + __u8 alarm_flags; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 shutdown_count; + __u8 ait_status; + __u16 pmic_temperature; + __u8 reserved1[8]; + __u8 shutdown_state; + __u32 vendor_size; + __u8 vendor_data[92]; + } __packed; + __u8 data[128]; + }; +} __packed; + +struct nd_intel_smart_threshold { + __u32 status; + union { + struct { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u8 reserved[1]; + } __packed; + __u8 data[8]; + }; +} __packed; + union acpi_object; typedef void *acpi_handle; -- cgit v1.2.3 From ed07c4338dd5ceadb5ffed0a5be03488ac54f5d2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Nov 2017 14:32:27 -0800 Subject: tools/testing/nvdimm: smart alarm/threshold control Allow the smart_threshold values to be changed via the 'set smart threshold command' and trigger notifications when the thresholds are met. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 157 ++++++++++++++++++++++++---------- tools/testing/nvdimm/test/nfit_test.h | 9 ++ 2 files changed, 122 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 640c02b08a50..2b57254342aa 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -168,6 +168,8 @@ struct nfit_test { spinlock_t lock; } ars_state; struct device *dimm_dev[NUM_DCR]; + struct nd_intel_smart *smart; + struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; struct work_struct work; }; @@ -440,50 +442,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus, return 0; } -static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len) +static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len, + struct nd_intel_smart *smart_data) { - static const struct nd_intel_smart smart_data = { - .flags = ND_INTEL_SMART_HEALTH_VALID - | ND_INTEL_SMART_SPARES_VALID - | ND_INTEL_SMART_ALARM_VALID - | ND_INTEL_SMART_USED_VALID - | ND_INTEL_SMART_SHUTDOWN_VALID - | ND_INTEL_SMART_MTEMP_VALID, - .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, - .media_temperature = 23 * 16, - .ctrl_temperature = 30 * 16, - .pmic_temperature = 40 * 16, - .spares = 75, - .alarm_flags = ND_INTEL_SMART_SPARE_TRIP - | ND_INTEL_SMART_TEMP_TRIP, - .ait_status = 1, - .life_used = 5, - .shutdown_state = 0, - .vendor_size = 0, - .shutdown_count = 100, - }; - if (buf_len < sizeof(*smart)) return -EINVAL; - memcpy(smart, &smart_data, sizeof(smart_data)); + memcpy(smart, smart_data, sizeof(*smart)); return 0; } static int nfit_test_cmd_smart_threshold( - struct nd_intel_smart_threshold *smart_t, - unsigned int buf_len) + struct nd_intel_smart_threshold *out, + unsigned int buf_len, + struct nd_intel_smart_threshold *smart_t) { - static const struct nd_intel_smart_threshold smart_t_data = { - .alarm_control = ND_INTEL_SMART_SPARE_TRIP - | ND_INTEL_SMART_TEMP_TRIP, - .media_temperature = 40 * 16, - .ctrl_temperature = 30 * 16, - .spares = 5, - }; - if (buf_len < sizeof(*smart_t)) return -EINVAL; - memcpy(smart_t, &smart_t_data, sizeof(smart_t_data)); + memcpy(out, smart_t, sizeof(*smart_t)); + return 0; +} + +static void smart_notify(struct device *bus_dev, + struct device *dimm_dev, struct nd_intel_smart *smart, + struct nd_intel_smart_threshold *thresh) +{ + dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n", + __func__, thresh->alarm_control, thresh->spares, + smart->spares, thresh->media_temperature, + smart->media_temperature, thresh->ctrl_temperature, + smart->ctrl_temperature); + if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP) + && smart->spares + <= thresh->spares) + || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP) + && smart->media_temperature + >= thresh->media_temperature) + || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) + && smart->ctrl_temperature + >= thresh->ctrl_temperature)) { + device_lock(bus_dev); + __acpi_nvdimm_notify(dimm_dev, 0x81); + device_unlock(bus_dev); + } +} + +static int nfit_test_cmd_smart_set_threshold( + struct nd_intel_smart_set_threshold *in, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + unsigned int size; + + size = sizeof(*in) - 4; + if (buf_len < size) + return -EINVAL; + memcpy(thresh->data, in, size); + in->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + return 0; } @@ -608,7 +626,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; - /* lookup label space for the given dimm */ + /* lookup per-dimm data */ for (i = 0; i < ARRAY_SIZE(handle); i++) if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) @@ -631,14 +649,19 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i - t->dcr_idx]); break; - case ND_CMD_SMART: - rc = nfit_test_cmd_smart(buf, buf_len); + case ND_INTEL_SMART: + rc = nfit_test_cmd_smart(buf, buf_len, + &t->smart[i - t->dcr_idx]); + break; + case ND_INTEL_SMART_THRESHOLD: + rc = nfit_test_cmd_smart_threshold(buf, buf_len, + &t->smart_threshold[i - t->dcr_idx]); break; - case ND_CMD_SMART_THRESHOLD: - rc = nfit_test_cmd_smart_threshold(buf, buf_len); - device_lock(&t->pdev.dev); - __acpi_nvdimm_notify(t->dimm_dev[i], 0x81); - device_unlock(&t->pdev.dev); + case ND_INTEL_SMART_SET_THRESHOLD: + rc = nfit_test_cmd_smart_set_threshold(buf, buf_len, + &t->smart_threshold[i - t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); break; default: return -ENOTTY; @@ -883,6 +906,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static void smart_init(struct nfit_test *t) +{ + int i; + const struct nd_intel_smart_threshold smart_t_data = { + .alarm_control = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .media_temperature = 40 * 16, + .ctrl_temperature = 30 * 16, + .spares = 5, + }; + const struct nd_intel_smart smart_data = { + .flags = ND_INTEL_SMART_HEALTH_VALID + | ND_INTEL_SMART_SPARES_VALID + | ND_INTEL_SMART_ALARM_VALID + | ND_INTEL_SMART_USED_VALID + | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_MTEMP_VALID, + .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, + .media_temperature = 23 * 16, + .ctrl_temperature = 30 * 16, + .pmic_temperature = 40 * 16, + .spares = 75, + .alarm_flags = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .ait_status = 1, + .life_used = 5, + .shutdown_state = 0, + .vendor_size = 0, + .shutdown_count = 100, + }; + + for (i = 0; i < t->num_dcr; i++) { + memcpy(&t->smart[i], &smart_data, sizeof(smart_data)); + memcpy(&t->smart_threshold[i], &smart_t_data, + sizeof(smart_t_data)); + } +} + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA @@ -950,6 +1011,7 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -980,6 +1042,7 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1653,13 +1716,14 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); @@ -2065,6 +2129,11 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(struct nfit_test_dcr *), GFP_KERNEL); nfit_test->dcr_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->smart = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart), GFP_KERNEL); + nfit_test->smart_threshold = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart_threshold), + GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr && nfit_test->dcr_dma && nfit_test->flush diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index b85fba2856c7..ba230f6f7676 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -86,6 +86,7 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART 1 #define ND_INTEL_SMART_THRESHOLD 2 +#define ND_INTEL_SMART_SET_THRESHOLD 17 #define ND_INTEL_SMART_HEALTH_VALID (1 << 0) #define ND_INTEL_SMART_SPARES_VALID (1 << 1) @@ -143,6 +144,14 @@ struct nd_intel_smart_threshold { }; } __packed; +struct nd_intel_smart_set_threshold { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 status; +} __packed; + union acpi_object; typedef void *acpi_handle; -- cgit v1.2.3 From 77be4c878c72e411ad22af96b6f81dd45c26450a Mon Sep 17 00:00:00 2001 From: Julien BOIBESSOT Date: Tue, 5 Dec 2017 18:48:14 +0100 Subject: tools/usbip: fixes build with musl libc toolchain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Indeed musl doesn't define old SIGCLD signal name but only new one SIGCHLD. SIGCHLD is the new POSIX name for that signal so it doesn't change anything on other libcs. This fixes this kind of build error: usbipd.c: In function ‘set_signal’: usbipd.c:459:12: error: 'SIGCLD' undeclared (first use in this function) sigaction(SIGCLD, &act, NULL); ^~~~~~ usbipd.c:459:12: note: each undeclared identifier is reported only once for each function it appears in Makefile:407: recipe for target 'usbipd.o' failed make[3]: *** [usbipd.o] Error 1 Signed-off-by: Julien BOIBESSOT Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbipd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/usb/usbip/src/usbipd.c b/tools/usb/usbip/src/usbipd.c index 009afb4a3aae..c6dad2a13c80 100644 --- a/tools/usb/usbip/src/usbipd.c +++ b/tools/usb/usbip/src/usbipd.c @@ -456,7 +456,7 @@ static void set_signal(void) sigaction(SIGTERM, &act, NULL); sigaction(SIGINT, &act, NULL); act.sa_handler = SIG_IGN; - sigaction(SIGCLD, &act, NULL); + sigaction(SIGCHLD, &act, NULL); } static const char *pid_file; -- cgit v1.2.3 From 658e85aa4ff2951f1e5163767827eaffccd51067 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Dec 2017 15:00:17 -0800 Subject: tools: bpftool: harmonise Makefile and Documentation/Makefile Several minor fixes and harmonisation items for Makefiles: * Use the same mechanism for verbose/non-verbose output in two files ("$(Q)"), for all commands. * Use calls to "QUIET_INSTALL" and equivalent in Makefile. In particular, use "call(descend, ...)" instead of "make -C" to run documentation targets. * Add a "doc-clean" target, aligned on "doc" and "doc-install". * Make "install" target in Makefile depend on "bpftool". * Remove condition on DESTDIR to initialise prefix in doc Makefile. * Remove modification of VPATH based on OUTPUT, it is unused. * Formatting: harmonise spaces around equal signs. * Make install path for man pages /usr/local/man instead of /usr/local/share/man (respects the Makefile conventions, and the latter is usually a symbolic link to the former anyway). * Do not erase prefix if set by user in bpftool Makefile. * Fix install target for bpftool: append DESTDIR to install path. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/Makefile | 22 +++++++------ tools/bpf/bpftool/Makefile | 53 ++++++++++++++------------------ 2 files changed, 36 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 37292bb5ce60..71c17fab4f2f 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -4,11 +4,14 @@ include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix ?= /usr/local +ifeq ($(V),1) + Q = +else + Q = @ endif -mandir ?= $(prefix)/share/man + +prefix ?= /usr/local +mandir ?= $(prefix)/man man8dir = $(mandir)/man8 MAN8_RST = $(wildcard *.rst) @@ -20,15 +23,16 @@ man: man8 man8: $(DOC_MAN8) $(OUTPUT)%.8: %.rst - rst2man $< > $@ + $(QUIET_GEN)rst2man $< > $@ clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8) + $(call QUIET_CLEAN, Documentation) + $(Q)$(RM) $(DOC_MAN8) install: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \ - $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir); + $(call QUIET_INSTALL, Documentation-man) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) + $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir) .PHONY: man man8 clean install .DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ec3052c0b004..203ae2e14fbc 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,25 +1,10 @@ include ../../scripts/Makefile.include - include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -ifneq ($(objtree),) -#$(info Determined 'objtree' to be $(objtree)) -endif - -ifneq ($(OUTPUT),) -#$(info Determined 'OUTPUT' to be $(OUTPUT)) -# Adding $(OUTPUT) as a directory to look for source files, -# because use generated output files as sources dependency -# for flex/bison parsers. -VPATH += $(OUTPUT) -export VPATH endif ifeq ($(V),1) @@ -28,12 +13,12 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - BPF_PATH=$(OUTPUT) + BPF_PATH = $(OUTPUT) else - BPF_PATH=$(BPF_DIR) + BPF_PATH = $(BPF_DIR) endif LIBBPF = $(BPF_PATH)libbpf.a @@ -45,7 +30,7 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null -prefix = /usr/local +prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions CC = gcc @@ -55,12 +40,15 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +INSTALL ?= install +RM ?= rm -f + include $(wildcard *.d) all: $(OUTPUT)bpftool -SRCS=$(wildcard *.c) -OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -73,21 +61,26 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) - $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d -install: - install -m 0755 -d $(prefix)/sbin - install $(OUTPUT)bpftool $(prefix)/sbin/bpftool - install -m 0755 -d $(bash_compdir) - install -m 0644 bash-completion/bpftool $(bash_compdir) +install: $(OUTPUT)bpftool + $(call QUIET_INSTALL, bpftool) + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin + $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) + $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) doc: - $(Q)$(MAKE) -C Documentation/ + $(call descend,Documentation) + +doc-clean: + $(call descend,Documentation,clean) doc-install: - $(Q)$(MAKE) -C Documentation/ install + $(call descend,Documentation,install) FORCE: -.PHONY: all clean FORCE install doc doc-install +.PHONY: all FORCE clean install +.PHONY: doc doc-clean doc-install .DEFAULT_GOAL := all -- cgit v1.2.3 From d32442485df7633fc67245e3e614d29ac3c45dbd Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 7 Dec 2017 15:00:18 -0800 Subject: tools: bpftool: create "uninstall", "doc-uninstall" make targets Create two targets to remove executable and documentation that would have been previously installed with `make install` and `make doc-install`. Also create a "QUIET_UNINST" helper in tools/scripts/Makefile.include. Do not attempt to remove directories /usr/local/sbin and /usr/share/bash-completions/completions, even if they are empty, as those specific directories probably already existed on the system before we installed the program, and we do not wish to break other makefiles that might assume their existence. Do remvoe /usr/local/share/man/man8 if empty however, as this directory does not seem to exist by default. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/Makefile | 8 +++++++- tools/bpf/bpftool/Makefile | 12 ++++++++++-- tools/scripts/Makefile.include | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 71c17fab4f2f..c462a928e03d 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -3,6 +3,7 @@ include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty ifeq ($(V),1) Q = @@ -34,5 +35,10 @@ install: man $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir) -.PHONY: man man8 clean install +uninstall: + $(call QUIET_UNINST, Documentation-man) + $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8)) + $(Q)$(RMDIR) $(DESTDIR)$(man8dir) + +.PHONY: man man8 clean install uninstall .DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 203ae2e14fbc..3f17ad317512 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -70,6 +70,11 @@ install: $(OUTPUT)bpftool $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) +uninstall: + $(call QUIET_UNINST, bpftool) + $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool + doc: $(call descend,Documentation) @@ -79,8 +84,11 @@ doc-clean: doc-install: $(call descend,Documentation,install) +doc-uninstall: + $(call descend,Documentation,uninstall) + FORCE: -.PHONY: all FORCE clean install -.PHONY: doc doc-clean doc-install +.PHONY: all FORCE clean install uninstall +.PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 3fab179b1aba..fcb3ed0be5f8 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -99,5 +99,6 @@ ifneq ($(silent),1) QUIET_CLEAN = @printf ' CLEAN %s\n' $1; QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif -- cgit v1.2.3 From f36dbfe1a504b85c7b3bf89fdd99991afbaa0f74 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Fri, 1 Sep 2017 10:17:14 +0800 Subject: selftests/powerpc: Fix build errors in powerpc ptrace selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 7 will take "r2" in clobber list as an error and it will get following build errors for powerpc ptrace selftests even with -fno-pic option: ptrace-tm-vsx.c: In function ‘tm_vsx’: ptrace-tm-vsx.c:42:2: error: PIC register clobbered by ‘r2’ in ‘asm’ asm __volatile__( ^~~ make[1]: *** [ptrace-tm-vsx] Error 1 ptrace-tm-spd-vsx.c: In function ‘tm_spd_vsx’: ptrace-tm-spd-vsx.c:55:2: error: PIC register clobbered by ‘r2’ in ‘asm’ asm __volatile__( ^~~ make[1]: *** [ptrace-tm-spd-vsx] Error 1 ptrace-tm-spr.c: In function ‘tm_spr’: ptrace-tm-spr.c:46:2: error: PIC register clobbered by ‘r2’ in ‘asm’ asm __volatile__( ^~~ Fix the build error by removing "r2" from the clobber list. None of these asm blocks actually clobber r2. Reported-by: Seth Forshee Signed-off-by: Simon Guo Tested-by: Seth Forshee Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c | 3 +-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 0df3c23b7888..277dade1b382 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -79,8 +79,8 @@ trans: : [res] "=r" (result), [texasr] "=r" (texasr) : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r2", "r3", "r4", - "r8", "r9", "r10", "r11" + : "memory", "r0", "r1", "r3", "r4", + "r7", "r8", "r9", "r10", "r11" ); if (result) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 94e57cb89769..51427a2465f6 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -76,8 +76,7 @@ trans: : [tfhar] "=r" (tfhar), [res] "=r" (result), [texasr] "=r" (texasr), [cptr1] "=r" (cptr1) : [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r2", "r3", "r4", - "r8", "r9", "r10", "r11", "r31" + : "memory", "r0", "r8", "r31" ); /* There are 2 32bit instructions before tbegin. */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index b4081e2b22d5..17c23cabac3e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -67,7 +67,7 @@ trans: : [res] "=r" (result), [texasr] "=r" (texasr) : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "r" (&cptr[1]) - : "memory", "r0", "r1", "r2", "r3", "r4", + : "memory", "r0", "r1", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); -- cgit v1.2.3 From 63060c39161d3d61c771dee20a3cbdffaf83f1df Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Tue, 12 Dec 2017 00:55:23 +0530 Subject: selftests: bpf: Adding config fragment CONFIG_CGROUP_BPF=y CONFIG_CGROUP_BPF=y is required for test_dev_cgroup test case. Signed-off-by: Naresh Kamboju Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 52d53ed08769..9d4897317c77 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -3,3 +3,4 @@ CONFIG_BPF_SYSCALL=y CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m +CONFIG_CGROUP_BPF=y -- cgit v1.2.3 From d279f1f8c64711ca986c3121c8ec811b892932f0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Dec 2017 11:39:03 -0800 Subject: bpf/tracing: add a bpf test for new ioctl query interface Added a subtest in test_progs. The tracepoint is sched/sched_switch. Multiple bpf programs are attached to this tracepoint and the query interface is exercised. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/perf_event.h | 22 +++++ tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_progs.c | 133 ++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_tracepoint.c | 26 +++++ 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_tracepoint.c (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 362493a2f950..f2c354d5f519 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -418,6 +418,27 @@ struct perf_event_attr { __u16 __reserved_2; /* align to __u64 */ }; +/* + * Structure used by below PERF_EVENT_IOC_QUERY_BPF command + * to query bpf programs attached to the same perf tracepoint + * as the given perf event. + */ +struct perf_event_query_bpf { + /* + * The below ids array length + */ + __u32 ids_len; + /* + * Set by the kernel to indicate the number of + * available programs + */ + __u32 prog_cnt; + /* + * User provided buffer to store program ids + */ + __u32 ids[0]; +}; + #define perf_flags(attr) (*(&(attr)->read_format + 1)) /* @@ -433,6 +454,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2c9d8c63c6fa..255fb1f50f6b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -17,7 +17,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 69427531408d..1d7d2149163a 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -21,8 +21,10 @@ typedef __u16 __sum16; #include #include #include +#include #include +#include #include #include #include @@ -617,6 +619,136 @@ static void test_obj_name(void) } } +static void test_tp_attach_query(void) +{ + const int num_progs = 3; + int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; + __u32 duration = 0, info_len, saved_prog_ids[num_progs]; + const char *file = "./test_tracepoint.o"; + struct perf_event_query_bpf *query; + struct perf_event_attr attr = {}; + struct bpf_object *obj[num_progs]; + struct bpf_prog_info prog_info; + char buf[256]; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + return; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + return; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + + query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); + for (i = 0; i < num_progs; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + &prog_fd[i]); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto cleanup1; + + bzero(&prog_info, sizeof(prog_info)); + prog_info.jited_prog_len = 0; + prog_info.xlated_prog_len = 0; + prog_info.nr_map_ids = 0; + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len); + if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n", + err, errno)) + goto cleanup1; + saved_prog_ids[i] = prog_info.id; + + pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd[i], errno)) + goto cleanup2; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 0) { + /* check NULL prog array query */ + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 0, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 1) { + /* try to get # of programs only */ + query->ids_len = 0; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + + /* try a few negative tests */ + /* invalid query pointer */ + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, + (struct perf_event_query_bpf *)0x1); + if (CHECK(!err || errno != EFAULT, + "perf_event_ioc_query_bpf", + "err %d errno %d\n", err, errno)) + goto cleanup3; + + /* no enough space */ + query->ids_len = 1; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != (i + 1), + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + for (j = 0; j < i + 1; j++) + if (CHECK(saved_prog_ids[j] != query->ids[j], + "perf_event_ioc_query_bpf", + "#%d saved_prog_id %x query prog_id %x\n", + j, saved_prog_ids[j], query->ids[j])) + goto cleanup3; + } + + i = num_progs - 1; + for (; i >= 0; i--) { + cleanup3: + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); + cleanup2: + close(pmu_fd[i]); + cleanup1: + bpf_object__close(obj[i]); + } + free(query); +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -630,6 +762,7 @@ int main(void) test_bpf_obj_id(); test_pkt_md_access(); test_obj_name(); + test_tp_attach_query(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c new file mode 100644 index 000000000000..04bf084517e0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tracepoint.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include +#include "bpf_helpers.h" + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ -- cgit v1.2.3 From 965de87e54b803223bff703ea6b2a76c056695ae Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Dec 2017 11:36:49 -0500 Subject: samples/bpf: add a test for bpf_override_return This adds a basic test for bpf_override_return to verify it works. We override the main function for mounting a btrfs fs so it'll return -ENOMEM and then make sure that trying to mount a btrfs fs will fail. Acked-by: Alexei Starovoitov Acked-by: Ingo Molnar Signed-off-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 7 ++++++- tools/testing/selftests/bpf/bpf_helpers.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c223ab30293..cf446c25c0ec 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17fa8a8b..33cb00e46c49 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From 583c90097f7271ab90f149b52b9ac2098bf2cbb5 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 13 Dec 2017 15:18:51 +0000 Subject: libbpf: add ability to guess program type based on section name The bpf_prog_load() function will guess program type if it's not specified explicitly. This functionality will be used to implement loading of different programs without asking a user to specify the program type. In first order it will be used by bpftool. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Martin KaFai Lau Cc: Quentin Monnet Cc: David Ahern Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5aa45f89da93..205b7822fa0a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1721,6 +1721,45 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +#define BPF_PROG_SEC(string, type) { string, sizeof(string), type } +static const struct { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; +} section_names[] = { + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), + BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), + BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), +}; +#undef BPF_PROG_SEC + +static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog) +{ + int i; + + if (!prog->section_name) + goto err; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) + if (strncmp(prog->section_name, section_names[i].sec, + section_names[i].len) == 0) + return section_names[i].prog_type; + +err: + pr_warning("failed to guess program type based on section name %s\n", + prog->section_name); + + return BPF_PROG_TYPE_UNSPEC; +} + int bpf_map__fd(struct bpf_map *map) { return map ? map->fd : -EINVAL; @@ -1832,6 +1871,18 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, return -ENOENT; } + /* + * If type is not specified, try to guess it based on + * section name. + */ + if (type == BPF_PROG_TYPE_UNSPEC) { + type = bpf_program__guess_type(prog); + if (type == BPF_PROG_TYPE_UNSPEC) { + bpf_object__close(obj); + return -EINVAL; + } + } + bpf_program__set_type(prog, type); err = bpf_object__load(obj); if (err) { -- cgit v1.2.3 From fe4d44b23f6b38194a92c6b8a50d921a071c4db4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 13 Dec 2017 15:18:52 +0000 Subject: libbpf: prefer global symbols as bpf program name source Libbpf picks the name of the first symbol in the corresponding elf section to use as a program name. But without taking symbol's scope into account it may end's up with some local label as a program name. E.g.: $ bpftool prog 1: type 15 name LBB0_10 tag 0390a5136ba23f5c loaded_at Dec 07/17:22 uid 0 xlated 456B not jited memlock 4096B Fix this by preferring global symbols as program name. For instance: $ bpftool prog 1: type 15 name bpf_prog1 tag 0390a5136ba23f5c loaded_at Dec 07/17:26 uid 0 xlated 456B not jited memlock 4096B Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Martin KaFai Lau Cc: Quentin Monnet Cc: David Ahern Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 205b7822fa0a..65d0d0aff4fa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -387,6 +387,8 @@ bpf_object__init_prog_names(struct bpf_object *obj) continue; if (sym.st_shndx != prog->idx) continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -- cgit v1.2.3 From 49a086c201a9356287471aa5846a427bdcecc4f7 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 13 Dec 2017 15:18:53 +0000 Subject: bpftool: implement prog load command Add the prog load command to load a bpf program from a specified binary file and pin it to bpffs. Usage description and examples are given in the corresponding man page. Syntax: $ bpftool prog load OBJ FILE FILE is a non-existing file on bpffs. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Reviewed-by: Jakub Kicinski Cc: Martin KaFai Lau Cc: Quentin Monnet Cc: David Ahern Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 10 +++- tools/bpf/bpftool/Documentation/bpftool.rst | 2 +- tools/bpf/bpftool/common.c | 71 +++++++++++++----------- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/prog.c | 29 +++++++++- 5 files changed, 79 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 36e8d1c3c40d..ffdb20e8280f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,7 +15,7 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump xlated** | **dump jited** | **pin** | **help** } + { **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } MAP COMMANDS ============= @@ -24,6 +24,7 @@ MAP COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* +| **bpftool** **prog load** *OBJ* *FILE* | **bpftool** **prog help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } @@ -57,6 +58,11 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. + **bpftool prog load** *OBJ* *FILE* + Load bpf program from binary *OBJ* and pin as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + **bpftool prog help** Print short help message. @@ -126,8 +132,10 @@ EXAMPLES | | **# mount -t bpf none /sys/fs/bpf/** | **# bpftool prog pin id 10 /sys/fs/bpf/prog** +| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2** | **# ls -l /sys/fs/bpf/** | -rw------- 1 root root 0 Jul 22 01:43 prog +| -rw------- 1 root root 0 Jul 22 01:44 prog2 **# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 926c03d5a8da..f547a0c0aa34 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -26,7 +26,7 @@ SYNOPSIS | **pin** | **help** } *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** - | **help** } + | **load** | **help** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 2bd3b280e6dd..b62c94e3997a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -163,13 +163,49 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_fd(int fd, const char *name) { char err_str[ERR_MAX_LEN]; - unsigned int id; - char *endptr; char *file; char *dir; + int err = 0; + + err = bpf_obj_pin(fd, name); + if (!err) + goto out; + + file = malloc(strlen(name) + 1); + strcpy(file, name); + dir = dirname(file); + + if (errno != EPERM || is_bpffs(dir)) { + p_err("can't pin the object (%s): %s", name, strerror(errno)); + goto out_free; + } + + /* Attempt to mount bpffs, then retry pinning. */ + err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); + if (!err) { + err = bpf_obj_pin(fd, name); + if (err) + p_err("can't pin the object (%s): %s", name, + strerror(errno)); + } else { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system to pin the object (%s): %s", + name, err_str); + } + +out_free: + free(file); +out: + return err; +} + +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +{ + unsigned int id; + char *endptr; int err; int fd; @@ -195,35 +231,8 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) return -1; } - err = bpf_obj_pin(fd, *argv); - if (!err) - goto out_close; - - file = malloc(strlen(*argv) + 1); - strcpy(file, *argv); - dir = dirname(file); - - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", *argv, strerror(errno)); - goto out_free; - } + err = do_pin_fd(fd, *argv); - /* Attempt to mount bpffs, then retry pinning. */ - err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, *argv); - if (err) - p_err("can't pin the object (%s): %s", *argv, - strerror(errno)); - } else { - err_str[ERR_MAX_LEN - 1] = '\0'; - p_err("can't mount BPF file system to pin the object (%s): %s", - *argv, err_str); - } - -out_free: - free(file); -out_close: close(fd); return err; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index bff330b49791..bec1ccbb49c7 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -111,6 +111,7 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); int do_map(int argc, char **arg); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index ad619b96c276..037484ceaeaf 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -45,6 +45,7 @@ #include #include +#include #include "main.h" #include "disasm.h" @@ -635,6 +636,30 @@ static int do_pin(int argc, char **argv) return err; } +static int do_load(int argc, char **argv) +{ + struct bpf_object *obj; + int prog_fd; + + if (argc != 2) + usage(); + + if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { + p_err("failed to load program\n"); + return -1; + } + + if (do_pin_fd(prog_fd, argv[1])) { + p_err("failed to pin program\n"); + return -1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -647,13 +672,14 @@ static int do_help(int argc, char **argv) " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" + " %s %s load OBJ FILE\n" " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -663,6 +689,7 @@ static const struct cmd cmds[] = { { "help", do_help }, { "dump", do_dump }, { "pin", do_pin }, + { "load", do_load }, { 0 } }; -- cgit v1.2.3 From 5ccda64d38cc2800e3c7fab42a2fea46d44693e9 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 13 Dec 2017 15:18:54 +0000 Subject: bpftool: implement cgroup bpf operations This patch adds basic cgroup bpf operations to bpftool: cgroup list, attach and detach commands. Usage is described in the corresponding man pages, and examples are provided. Syntax: $ bpftool cgroup list CGROUP $ bpftool cgroup attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS] $ bpftool cgroup detach CGROUP ATTACH_TYPE PROG Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Martin KaFai Lau Cc: Quentin Monnet Reviewed-by: David Ahern Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 118 ++++++++ tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +- tools/bpf/bpftool/Documentation/bpftool.rst | 6 +- tools/bpf/bpftool/cgroup.c | 307 +++++++++++++++++++++ tools/bpf/bpftool/main.c | 3 +- tools/bpf/bpftool/main.h | 1 + 7 files changed, 434 insertions(+), 5 deletions(-) create mode 100644 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst create mode 100644 tools/bpf/bpftool/cgroup.c (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst new file mode 100644 index 000000000000..45c71b1f682b --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -0,0 +1,118 @@ +================ +bpftool-cgroup +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF progs +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **cgroup** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + + *COMMANDS* := + { **list** | **attach** | **detach** | **help** } + +MAP COMMANDS +============= + +| **bpftool** **cgroup list** *CGROUP* +| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] +| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* +| **bpftool** **cgroup help** +| +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* } +| *ATTACH_FLAGS* := { *multi* | *override* } + +DESCRIPTION +=========== + **bpftool cgroup list** *CGROUP* + List all programs attached to the cgroup *CGROUP*. + + Output will start with program ID followed by attach type, + attach flags and program name. + + **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] + Attach program *PROG* to the cgroup *CGROUP* with attach type + *ATTACH_TYPE* and optional *ATTACH_FLAGS*. + + *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs + some bpf program, the program in this cgroup yields to sub-cgroup + program; **multi** if a sub-cgroup installs some bpf program, + that cgroup program gets run in addition to the program in this + cgroup. + + Only one program is allowed to be attached to a cgroup with + no attach flags or the **override** flag. Attaching another + program will release old program and attach the new one. + + Multiple programs are allowed to be attached to a cgroup with + **multi**. They are executed in FIFO order (those that were + attached first, run first). + + Non-default *ATTACH_FLAGS* are supported by kernel version 4.14 + and later. + + *ATTACH_TYPE* can be on of: + **ingress** ingress path of the inet socket (since 4.10); + **egress** egress path of the inet socket (since 4.10); + **sock_create** opening of an inet socket (since 4.10); + **sock_ops** various socket operations (since 4.12); + **device** device access (since 4.15). + + **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* + Detach *PROG* from the cgroup *CGROUP* and attach type + *ATTACH_TYPE*. + + **bpftool prog help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -f, --bpffs + Show file names of pinned programs. + +EXAMPLES +======== +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# mkdir /sys/fs/cgroup/test.slice** +| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog** +| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi** + +**# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + 1 device allow_multi bpf_prog1 + +| +| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1** +| **# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 9f51a268eb06..421cabc417e6 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -128,4 +128,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8) + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index ffdb20e8280f..81c97c0e9b67 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -155,4 +155,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-map**\ (8) + **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index f547a0c0aa34..6732a5a617e4 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** } + *OBJECT* := { **map** | **program** | **cgroup** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -28,6 +28,8 @@ SYNOPSIS *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** | **load** | **help** } + *CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects @@ -53,4 +55,4 @@ OPTIONS SEE ALSO ======== - **bpftool-map**\ (8), **bpftool-prog**\ (8) + **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c new file mode 100644 index 000000000000..34ca303d72bc --- /dev/null +++ b/tools/bpf/bpftool/cgroup.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2017 Facebook +// Author: Roman Gushchin + +#include +#include +#include +#include +#include +#include + +#include + +#include "main.h" + +#define HELP_SPEC_ATTACH_FLAGS \ + "ATTACH_FLAGS := { multi | override }" + +#define HELP_SPEC_ATTACH_TYPES \ + "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" + +static const char * const attach_type_strings[] = { + [BPF_CGROUP_INET_INGRESS] = "ingress", + [BPF_CGROUP_INET_EGRESS] = "egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", + [BPF_CGROUP_SOCK_OPS] = "sock_ops", + [BPF_CGROUP_DEVICE] = "device", + [__MAX_BPF_ATTACH_TYPE] = NULL, +}; + +static enum bpf_attach_type parse_attach_type(const char *str) +{ + enum bpf_attach_type type; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_type_strings[type] && + is_prefix(str, attach_type_strings[type])) + return type; + } + + return __MAX_BPF_ATTACH_TYPE; +} + +static int list_bpf_prog(int id, const char *attach_type_str, + const char *attach_flags_str) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int prog_fd; + + prog_fd = bpf_prog_get_fd_by_id(id); + if (prog_fd < 0) + return -1; + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + close(prog_fd); + return -1; + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "id", info.id); + jsonw_string_field(json_wtr, "attach_type", + attach_type_str); + jsonw_string_field(json_wtr, "attach_flags", + attach_flags_str); + jsonw_string_field(json_wtr, "name", info.name); + jsonw_end_object(json_wtr); + } else { + printf("%-8u %-15s %-15s %-15s\n", info.id, + attach_type_str, + attach_flags_str, + info.name); + } + + close(prog_fd); + return 0; +} + +static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +{ + __u32 prog_ids[1024] = {0}; + char *attach_flags_str; + __u32 prog_cnt, iter; + __u32 attach_flags; + char buf[32]; + int ret; + + prog_cnt = ARRAY_SIZE(prog_ids); + ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids, + &prog_cnt); + if (ret) + return ret; + + if (prog_cnt == 0) + return 0; + + switch (attach_flags) { + case BPF_F_ALLOW_MULTI: + attach_flags_str = "multi"; + break; + case BPF_F_ALLOW_OVERRIDE: + attach_flags_str = "override"; + break; + case 0: + attach_flags_str = ""; + break; + default: + snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags); + attach_flags_str = buf; + } + + for (iter = 0; iter < prog_cnt; iter++) + list_bpf_prog(prog_ids[iter], attach_type_strings[type], + attach_flags_str); + + return 0; +} + +static int do_list(int argc, char **argv) +{ + enum bpf_attach_type type; + int cgroup_fd; + int ret = -1; + + if (argc < 1) { + p_err("too few parameters for cgroup list\n"); + goto exit; + } else if (argc > 1) { + p_err("too many parameters for cgroup list\n"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s\n", argv[1]); + goto exit; + } + + if (json_output) + jsonw_start_array(json_wtr); + else + printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType", + "AttachFlags", "Name"); + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + /* + * Not all attach types may be supported, so it's expected, + * that some requests will fail. + * If we were able to get the list for at least one + * attach type, let's return 0. + */ + if (list_attached_bpf_progs(cgroup_fd, type) == 0) + ret = 0; + } + + if (json_output) + jsonw_end_array(json_wtr); + + close(cgroup_fd); +exit: + return ret; +} + +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int cgroup_fd, prog_fd; + int attach_flags = 0; + int ret = -1; + int i; + + if (argc < 4) { + p_err("too few parameters for cgroup attach\n"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s\n", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type\n"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + for (i = 0; i < argc; i++) { + if (is_prefix(argv[i], "multi")) { + attach_flags |= BPF_F_ALLOW_MULTI; + } else if (is_prefix(argv[i], "override")) { + attach_flags |= BPF_F_ALLOW_OVERRIDE; + } else { + p_err("unknown option: %s\n", argv[i]); + goto exit_cgroup; + } + } + + if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) { + p_err("failed to attach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_detach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int prog_fd, cgroup_fd; + int ret = -1; + + if (argc < 4) { + p_err("too few parameters for cgroup detach\n"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s\n", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) { + p_err("failed to detach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s list CGROUP\n" + " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" + " %s %s detach CGROUP ATTACH_TYPE PROG\n" + " %s %s help\n" + "\n" + " " HELP_SPEC_ATTACH_TYPES "\n" + " " HELP_SPEC_ATTACH_FLAGS "\n" + " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "list", do_list }, + { "attach", do_attach }, + { "detach", do_detach }, + { "help", do_help }, + { 0 } +}; + +int do_cgroup(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d294bc8168be..ecd53ccf1239 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -85,7 +85,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map }\n" + " OBJECT := { prog | map | cgroup }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -173,6 +173,7 @@ static const struct cmd cmds[] = { { "batch", do_batch }, { "prog", do_prog }, { "map", do_map }, + { "cgroup", do_cgroup }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index bec1ccbb49c7..8f6d3cac0347 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -115,6 +115,7 @@ int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); int do_map(int argc, char **arg); +int do_cgroup(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); -- cgit v1.2.3 From a7ff3eca95a5f9bc24132b5975f40dac10710ed1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:07 -0800 Subject: selftests/bpf: add verifier tests for bpf_call Add extensive set of tests for bpf_call verification logic: calls: basic sanity calls: using r0 returned by callee calls: callee is using r1 calls: callee using args1 calls: callee using wrong args2 calls: callee using two args calls: callee changing pkt pointers calls: two calls with args calls: two calls with bad jump calls: recursive call. test1 calls: recursive call. test2 calls: unreachable code calls: invalid call calls: jumping across function bodies. test1 calls: jumping across function bodies. test2 calls: call without exit calls: call into middle of ld_imm64 calls: call into middle of other call calls: two calls with bad fallthrough calls: two calls with stack read calls: two calls with stack write calls: spill into caller stack frame calls: two calls with stack write and void return calls: ambiguous return value calls: two calls that return map_value calls: two calls that return map_value with bool condition calls: two calls that return map_value with incorrect bool check calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1 calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2 calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3 calls: two calls that receive map_value_ptr_or_null via arg. test1 calls: two calls that receive map_value_ptr_or_null via arg. test2 calls: pkt_ptr spill into caller stack Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 961 +++++++++++++++++++++++++++- 1 file changed, 960 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c64f30cf63c..88f389c6ec48 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2,6 +2,7 @@ * Testsuite for eBPF verifier * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -277,7 +278,7 @@ static struct bpf_test tests[] = { .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), }, - .errstr = "jump out of range", + .errstr = "not an exit", .result = REJECT, }, { @@ -8097,6 +8098,964 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, + { + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + }, + { + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, + }, + { + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, + }, + { + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, + }, + { + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From d98588cef04529aa326c6cbc0cfa01a3a3e00ef5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:09 -0800 Subject: selftests/bpf: add tests for stack_zero tracking adjust two tests, since verifier got smarter and add new one to test stack_zero logic Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 66 ++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 88f389c6ec48..eaf294822a8f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5649,7 +5649,7 @@ static struct bpf_test tests[] = { "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -5884,7 +5884,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), @@ -9056,6 +9056,68 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 48cca7e44f9f8268fdcd4351e2f19ff2275119d1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:10 -0800 Subject: libbpf: add support for bpf_call - recognize relocation emitted by llvm - since all regular function will be kept in .text section and llvm takes care of pc-relative offsets in bpf_call instruction simply copy all of .text to relevant program section while adjusting bpf_call instructions in program section to point to newly copied body of instructions from .text - do so for all programs in the elf file - set all programs types to the one passed to bpf_prog_load() Note for elf files with multiple programs that use different functions in .text section we need to do 'linker' style logic. This work is still TBD Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 6 ++ tools/lib/bpf/bpf.h | 2 +- tools/lib/bpf/libbpf.c | 170 ++++++++++++++++++++++++++++++----------- 3 files changed, 134 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cf446c25c0ec..db1b0923a308 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6534889e2b2f..9f44c196931e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -40,7 +40,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, __u32 map_flags); /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE 65536 +#define BPF_LOG_BUF_SIZE (256 * 1024) int bpf_load_program_name(enum bpf_prog_type type, const char *name, const struct bpf_insn *insns, size_t insns_cnt, const char *license, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 65d0d0aff4fa..5b83875b3594 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -174,12 +174,19 @@ struct bpf_program { char *name; char *section_name; struct bpf_insn *insns; - size_t insns_cnt; + size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct { + struct reloc_desc { + enum { + RELO_LD64, + RELO_CALL, + } type; int insn_idx; - int map_idx; + union { + int map_idx; + int text_off; + }; } *reloc_desc; int nr_reloc; @@ -234,6 +241,7 @@ struct bpf_object { } *reloc; int nr_reloc; int maps_shndx; + int text_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -375,9 +383,13 @@ bpf_object__init_prog_names(struct bpf_object *obj) size_t pi, si; for (pi = 0; pi < obj->nr_programs; pi++) { - char *name = NULL; + const char *name = NULL; prog = &obj->programs[pi]; + if (prog->idx == obj->efile.text_shndx) { + name = ".text"; + goto skip_search; + } for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) { @@ -405,7 +417,7 @@ bpf_object__init_prog_names(struct bpf_object *obj) prog->section_name); return -EINVAL; } - +skip_search: prog->name = strdup(name); if (!prog->name) { pr_warning("failed to allocate memory for prog sym %s\n", @@ -795,6 +807,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if ((sh.sh_type == SHT_PROGBITS) && (sh.sh_flags & SHF_EXECINSTR) && (data->d_size > 0)) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; err = bpf_object__add_program(obj, data->d_buf, data->d_size, name, idx); if (err) { @@ -856,11 +870,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) } static int -bpf_program__collect_reloc(struct bpf_program *prog, - size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols, - int maps_shndx, struct bpf_map *maps) +bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, + Elf_Data *data, struct bpf_object *obj) { + Elf_Data *symbols = obj->efile.symbols; + int text_shndx = obj->efile.text_shndx; + int maps_shndx = obj->efile.maps_shndx; + struct bpf_map *maps = obj->maps; + size_t nr_maps = obj->nr_maps; int i, nrels; pr_debug("collecting relocating info for: '%s'\n", @@ -893,8 +910,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } + pr_debug("relo for %ld value %ld name %d\n", + rel.r_info >> 32, sym.st_value, sym.st_name); - if (sym.st_shndx != maps_shndx) { + if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", prog->section_name, sym.st_shndx); return -LIBBPF_ERRNO__RELOC; @@ -903,6 +922,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, insn_idx = rel.r_offset / sizeof(struct bpf_insn); pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { + if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { + pr_warning("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + prog->reloc_desc[i].type = RELO_CALL; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].text_off = sym.st_value; + continue; + } + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -924,6 +954,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } + prog->reloc_desc[i].type = RELO_LD64; prog->reloc_desc[i].insn_idx = insn_idx; prog->reloc_desc[i].map_idx = map_idx; } @@ -962,28 +993,77 @@ bpf_object__create_maps(struct bpf_object *obj) return 0; } +static int +bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, + struct reloc_desc *relo) +{ + struct bpf_insn *insn, *new_insn; + struct bpf_program *text; + size_t new_cnt; + + if (relo->type != RELO_CALL) + return -LIBBPF_ERRNO__RELOC; + + if (prog->idx == obj->efile.text_shndx) { + pr_warning("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); + return -LIBBPF_ERRNO__RELOC; + } + + if (prog->main_prog_cnt == 0) { + text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); + if (!text) { + pr_warning("no .text section found yet relo into text exist\n"); + return -LIBBPF_ERRNO__RELOC; + } + new_cnt = prog->insns_cnt + text->insns_cnt; + new_insn = realloc(prog->insns, new_cnt * sizeof(*insn)); + if (!new_insn) { + pr_warning("oom in prog realloc\n"); + return -ENOMEM; + } + memcpy(new_insn + prog->insns_cnt, text->insns, + text->insns_cnt * sizeof(*insn)); + prog->insns = new_insn; + prog->main_prog_cnt = prog->insns_cnt; + prog->insns_cnt = new_cnt; + } + insn = &prog->insns[relo->insn_idx]; + insn->imm += prog->main_prog_cnt - relo->insn_idx; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, prog->section_name); + return 0; +} + static int bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { - int i; + int i, err; if (!prog || !prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { - int insn_idx, map_idx; - struct bpf_insn *insns = prog->insns; + if (prog->reloc_desc[i].type == RELO_LD64) { + struct bpf_insn *insns = prog->insns; + int insn_idx, map_idx; - insn_idx = prog->reloc_desc[i].insn_idx; - map_idx = prog->reloc_desc[i].map_idx; + insn_idx = prog->reloc_desc[i].insn_idx; + map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { - pr_warning("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; + if (insn_idx >= (int)prog->insns_cnt) { + pr_warning("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insns[insn_idx].imm = obj->maps[map_idx].fd; + } else { + err = bpf_program__reloc_text(prog, obj, + &prog->reloc_desc[i]); + if (err) + return err; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - insns[insn_idx].imm = obj->maps[map_idx].fd; } zfree(&prog->reloc_desc); @@ -1026,7 +1106,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) Elf_Data *data = obj->efile.reloc[i].data; int idx = shdr->sh_info; struct bpf_program *prog; - size_t nr_maps = obj->nr_maps; if (shdr->sh_type != SHT_REL) { pr_warning("internal error at %d\n", __LINE__); @@ -1040,11 +1119,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__RELOC; } - err = bpf_program__collect_reloc(prog, nr_maps, + err = bpf_program__collect_reloc(prog, shdr, data, - obj->efile.symbols, - obj->efile.maps_shndx, - obj->maps); + obj); if (err) return err; } @@ -1197,6 +1274,8 @@ bpf_object__load_progs(struct bpf_object *obj) int err; for (i = 0; i < obj->nr_programs; i++) { + if (obj->programs[i].idx == obj->efile.text_shndx) + continue; err = bpf_program__load(&obj->programs[i], obj->license, obj->kern_version); @@ -1859,7 +1938,7 @@ long libbpf_get_error(const void *ptr) int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { - struct bpf_program *prog; + struct bpf_program *prog, *first_prog = NULL; struct bpf_object *obj; int err; @@ -1867,25 +1946,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, if (IS_ERR(obj)) return -ENOENT; - prog = bpf_program__next(NULL, obj); - if (!prog) { - bpf_object__close(obj); - return -ENOENT; - } - - /* - * If type is not specified, try to guess it based on - * section name. - */ - if (type == BPF_PROG_TYPE_UNSPEC) { - type = bpf_program__guess_type(prog); + bpf_object__for_each_program(prog, obj) { + /* + * If type is not specified, try to guess it based on + * section name. + */ if (type == BPF_PROG_TYPE_UNSPEC) { - bpf_object__close(obj); - return -EINVAL; + type = bpf_program__guess_type(prog); + if (type == BPF_PROG_TYPE_UNSPEC) { + bpf_object__close(obj); + return -EINVAL; + } } + + bpf_program__set_type(prog, type); + if (prog->idx != obj->efile.text_shndx && !first_prog) + first_prog = prog; + } + + if (!first_prog) { + pr_warning("object file doesn't contain bpf program\n"); + bpf_object__close(obj); + return -ENOENT; } - bpf_program__set_type(prog, type); err = bpf_object__load(obj); if (err) { bpf_object__close(obj); @@ -1893,6 +1977,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, } *pobj = obj; - *prog_fd = bpf_program__fd(prog); + *prog_fd = bpf_program__fd(first_prog); return 0; } -- cgit v1.2.3 From 3bc35c63cb70466c78d3972ceaf8205aa463a192 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:11 -0800 Subject: selftests/bpf: add bpf_call test strip always_inline from test_l4lb.c and compile it with -fno-inline to let verifier go through 11 function with various function arguments and return values Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 11 +- tools/testing/selftests/bpf/test_l4lb_noinline.c | 473 +++++++++++++++++++++++ tools/testing/selftests/bpf/test_progs.c | 14 +- 3 files changed, 492 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_l4lb_noinline.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 255fb1f50f6b..6970d073df5b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -17,7 +17,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ + test_l4lb_noinline.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py @@ -49,8 +50,12 @@ else CPU ?= generic endif +CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline + %.o: %.c - $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ + $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c new file mode 100644 index 000000000000..ba44a14e6dc4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 1d7d2149163a..abff83bf8d40 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -169,10 +169,9 @@ out: #define NUM_ITER 100000 #define VIP_NUM 5 -static void test_l4lb(void) +static void test_l4lb(const char *file) { unsigned int nr_cpus = bpf_num_possible_cpus(); - const char *file = "./test_l4lb.o"; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -249,6 +248,15 @@ out: bpf_object__close(obj); } +static void test_l4lb_all(void) +{ + const char *file1 = "./test_l4lb.o"; + const char *file2 = "./test_l4lb_noinline.o"; + + test_l4lb(file1); + test_l4lb(file2); +} + static void test_tcp_estats(void) { const char *file = "./test_tcp_estats.o"; @@ -757,7 +765,7 @@ int main(void) test_pkt_access(); test_xdp(); - test_l4lb(); + test_l4lb_all(); test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); -- cgit v1.2.3 From b0b04fc49e3b97a6039b9b658798efdcda71478d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:12 -0800 Subject: selftests/bpf: add xdp noinline test add large semi-artificial XDP test with 18 functions to stress test bpf call verification logic Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_progs.c | 81 +++ tools/testing/selftests/bpf/test_xdp_noinline.c | 833 ++++++++++++++++++++++++ 3 files changed, 916 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_xdp_noinline.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6970d073df5b..7ef9601d04bf 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -18,7 +18,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ - test_l4lb_noinline.o + test_l4lb_noinline.o test_xdp_noinline.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py @@ -54,6 +54,7 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ -Wno-compare-distinct-pointer-types $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline %.o: %.c $(CLANG) $(CLANG_FLAGS) \ diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index abff83bf8d40..6472ca98690e 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -257,6 +257,86 @@ static void test_l4lb_all(void) test_l4lb(file2); } +static void test_xdp_noinline(void) +{ + const char *file = "./test_xdp_noinline.o"; + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + static void test_tcp_estats(void) { const char *file = "./test_tcp_estats.o"; @@ -766,6 +846,7 @@ int main(void) test_pkt_access(); test_xdp(); test_l4lb_all(); + test_xdp_noinline(); test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c new file mode 100644 index 000000000000..5e4aac74f9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_noinline.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __attribute__ ((noinline)) +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __attribute__ ((noinline)) +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __attribute__ ((noinline)) +u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; +}; + +struct packet_description { + struct flow_key flow; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u16 family; + __u8 proto; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct lb_stats { + __u64 v2; + __u64 v1; +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip_definition), + .value_size = sizeof(struct vip_meta), + .max_entries = 512, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(struct flow_key), + .value_size = sizeof(struct real_pos_lru), + .max_entries = 300, + .map_flags = 1U << 1, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 12 * 655, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = 40, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct lb_stats), + .max_entries = 515, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, +}; + +struct eth_hdr { + unsigned char eth_dest[6]; + unsigned char eth_source[6]; + unsigned short eth_proto; +}; + +static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +{ + __u64 off = sizeof(struct eth_hdr); + if (is_ipv6) { + off += sizeof(struct ipv6hdr); + if (is_icmp) + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); + } else { + off += sizeof(struct iphdr); + if (is_icmp) + off += sizeof(struct icmphdr) + sizeof(struct iphdr); + } + return off; +} + +static __attribute__ ((noinline)) +bool parse_udp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return 0; + if (!is_icmp) { + pckt->flow.port16[0] = udp->source; + pckt->flow.port16[1] = udp->dest; + } else { + pckt->flow.port16[0] = udp->dest; + pckt->flow.port16[1] = udp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool parse_tcp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return 0; + if (tcp->syn) + pckt->flags |= (1 << 1); + if (!is_icmp) { + pckt->flow.port16[0] = tcp->source; + pckt->flow.port16[1] = tcp->dest; + } else { + pckt->flow.port16[0] = tcp->dest; + pckt->flow.port16[1] = tcp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + struct ipv6hdr *ip6h; + __u32 ip_suffix; + void *data_end; + void *data; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + ip6h = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct ipv6hdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || ip6h + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 56710; + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + + ip6h->nexthdr = IPPROTO_IPV6; + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; + ip6h->payload_len = + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + ip6h->hop_limit = 4; + + ip6h->saddr.in6_u.u6_addr32[0] = 1; + ip6h->saddr.in6_u.u6_addr32[1] = 2; + ip6h->saddr.in6_u.u6_addr32[2] = 3; + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + __u16 *next_iph_u16; + struct iphdr *iph; + __u32 csum = 0; + void *data_end; + void *data; + + ip_suffix <<= 15; + ip_suffix ^= pckt->flow.src; + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + iph = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct iphdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || iph + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + iph->version = 4; + iph->ihl = 5; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 1; + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + /* don't update iph->daddr, since it will overwrite old eth_proto + * and multiple iterations of bpf_prog_run() will fail + */ + + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; + iph->ttl = 4; + + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct ipv6hdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + if (inner_v4) + new_eth->eth_proto = 8; + else + new_eth->eth_proto = 56710; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct iphdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +int swap_mac_and_send(void *data, void *data_end) +{ + unsigned char tmp_mac[6]; + struct eth_hdr *eth; + + eth = data; + memcpy(tmp_mac, eth->eth_source, 6); + memcpy(eth->eth_source, eth->eth_dest, 6); + memcpy(eth->eth_dest, tmp_mac, 6); + return XDP_TX; +} + +static __attribute__ ((noinline)) +int send_icmp_reply(void *data, void *data_end) +{ + struct icmphdr *icmp_hdr; + __u16 *next_iph_u16; + __u32 tmp_addr = 0; + struct iphdr *iph; + __u32 csum1 = 0; + __u32 csum = 0; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + icmp_hdr->type = 0; + icmp_hdr->checksum += 0x0007; + iph->ttl = 4; + tmp_addr = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_addr; + iph->check = 0; + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int send_icmp6_reply(void *data, void *data_end) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + __be32 tmp_addr[4]; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + ip6h = data + off; + off += sizeof(struct ipv6hdr); + icmp_hdr = data + off; + icmp_hdr->icmp6_type = 129; + icmp_hdr->icmp6_cksum -= 0x0001; + ip6h->hop_limit = 4; + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->icmp6_type == 128) + return send_icmp6_reply(data, data_end); + if (icmp_hdr->icmp6_type != 3) + return XDP_PASS; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + pckt->flow.proto = ip6h->nexthdr; + pckt->flags |= (1 << 0); + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); + return -1; +} + +static __attribute__ ((noinline)) +int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->type == 8) + return send_icmp_reply(data, data_end); + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) + return XDP_PASS; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + pckt->flow.proto = iph->protocol; + pckt->flags |= (1 << 0); + pckt->flow.src = iph->daddr; + pckt->flow.dst = iph->saddr; + return -1; +} + +static __attribute__ ((noinline)) +__u32 get_packet_hash(struct packet_description *pckt, + bool hash_16bytes) +{ + if (hash_16bytes) + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), + pckt->flow.ports, 24); + else + return jhash_2words(pckt->flow.src, pckt->flow.ports, + 24); +} + +__attribute__ ((noinline)) +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6, void *lru_map) +{ + struct real_pos_lru new_dst_lru = { }; + bool hash_16bytes = is_ipv6; + __u32 *real_pos, hash, key; + __u64 cur_time; + + if (vip_info->flags & (1 << 2)) + hash_16bytes = 1; + if (vip_info->flags & (1 << 3)) { + pckt->flow.port16[0] = pckt->flow.port16[1]; + memset(pckt->flow.srcv6, 0, 16); + } + hash = get_packet_hash(pckt, hash_16bytes); + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + key = 2 * vip_info->vip_num + hash % 2; + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return 0; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return 0; + if (!(vip_info->flags & (1 << 1))) { + __u32 conn_rate_key = 512 + 2; + struct lb_stats *conn_rate_stats = + bpf_map_lookup_elem(&stats, &conn_rate_key); + + if (!conn_rate_stats) + return 1; + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { + conn_rate_stats->v1 = 1; + conn_rate_stats->v2 = cur_time; + } else { + conn_rate_stats->v1 += 1; + if (conn_rate_stats->v1 >= 1) + return 1; + } + if (pckt->flow.proto == IPPROTO_UDP) + new_dst_lru.atime = cur_time; + new_dst_lru.pos = key; + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); + } + return 1; +} + +__attribute__ ((noinline)) +static void connection_table_lookup(struct real_definition **real, + struct packet_description *pckt, + void *lru_map) +{ + + struct real_pos_lru *dst_lru; + __u64 cur_time; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); + if (!dst_lru) + return; + if (pckt->flow.proto == IPPROTO_UDP) { + cur_time = bpf_ktime_get_ns(); + if (cur_time - dst_lru->atime > 300000) + return; + dst_lru->atime = cur_time; + } + key = dst_lru->pos; + *real = bpf_map_lookup_elem(&reals, &key); +} + +/* don't believe your eyes! + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 + * but since it's _static_ llvm can optimize one argument away + */ +__attribute__ ((noinline)) +static int process_l3_headers_v6(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct ipv6hdr *ip6h; + __u64 iph_len; + int action; + + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + iph_len = sizeof(struct ipv6hdr); + *protocol = ip6h->nexthdr; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + off += iph_len; + if (*protocol == 45) { + return XDP_DROP; + } else if (*protocol == 59) { + action = parse_icmpv6(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); + } + return -1; +} + +__attribute__ ((noinline)) +static int process_l3_headers_v4(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct iphdr *iph; + __u64 iph_len; + int action; + + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + *protocol = iph->protocol; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(iph->tot_len); + off += 20; + if (iph->frag_off & 65343) + return XDP_DROP; + if (*protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + pckt->flow.src = iph->saddr; + pckt->flow.dst = iph->daddr; + } + return -1; +} + +__attribute__ ((noinline)) +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct xdp_md *xdp) +{ + + struct real_definition *dst = NULL; + struct packet_description pckt = { }; + struct vip_definition vip = { }; + struct lb_stats *data_stats; + struct eth_hdr *eth = data; + void *lru_map = &lru_cache; + struct vip_meta *vip_info; + __u32 lru_stats_key = 513; + __u32 mac_addr_pos = 0; + __u32 stats_key = 512; + struct ctl_value *cval; + __u16 pkt_bytes; + __u64 iph_len; + __u8 protocol; + __u32 vip_num; + int action; + + if (is_ipv6) + action = process_l3_headers_v6(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + else + action = process_l3_headers_v4(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + if (action >= 0) + return action; + protocol = pckt.flow.proto; + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else { + return XDP_TX; + } + + if (is_ipv6) + memcpy(vip.vipv6, pckt.flow.dstv6, 16); + else + vip.vip = pckt.flow.dst; + vip.port = pckt.flow.port16[1]; + vip.proto = pckt.flow.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.port = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return XDP_PASS; + if (!(vip_info->flags & (1 << 4))) + pckt.flow.port16[1] = 0; + } + if (data_end - data > 1400) + return XDP_DROP; + data_stats = bpf_map_lookup_elem(&stats, &stats_key); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + if (!dst) { + if (vip_info->flags & (1 << 0)) + pckt.flow.port16[0] = 0; + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) + connection_table_lookup(&dst, &pckt, lru_map); + if (dst) + goto out; + if (pckt.flow.proto == IPPROTO_TCP) { + struct lb_stats *lru_stats = + bpf_map_lookup_elem(&stats, &lru_stats_key); + + if (!lru_stats) + return XDP_DROP; + if (pckt.flags & (1 << 1)) + lru_stats->v1 += 1; + else + lru_stats->v2 += 1; + } + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) + return XDP_DROP; + data_stats->v2 += 1; + } +out: + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); + if (!cval) + return XDP_DROP; + if (dst->flags & (1 << 0)) { + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } else { + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + data_stats->v2 += pkt_bytes; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + if (data + 4 > data_end) + return XDP_DROP; + *(u32 *)data = dst->dst; + return XDP_DROP; +} + +__attribute__ ((section("xdp-test"), used)) +int balancer_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = eth->eth_proto; + if (eth_proto == 8) + return process_packet(data, nh_off, data_end, 0, ctx); + else if (eth_proto == 56710) + return process_packet(data, nh_off, data_end, 1, ctx); + else + return XDP_DROP; +} + +char _license[] __attribute__ ((section("license"), used)) = "GPL"; +int _version __attribute__ ((section("version"), used)) = 1; -- cgit v1.2.3 From 28ab173e96b3971842414bf88eb02eca6ea3f018 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 17:55:17 -0800 Subject: selftests/bpf: additional bpf_call tests Add some additional checks for few more corner cases. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 597 ++++++++++++++++++++++++++++ 1 file changed, 597 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index eaf294822a8f..3bacff0d6f91 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8110,6 +8110,180 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, .result = ACCEPT, }, + { + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, + }, + { + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, { "calls: using r0 returned by callee", .insns = { @@ -8121,6 +8295,17 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, .result = ACCEPT, }, + { + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, + }, { "calls: callee is using r1", .insns = { @@ -8223,6 +8408,71 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, + }, + { + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, + }, { "calls: two calls with bad jump", .insns = { @@ -8297,6 +8547,18 @@ static struct bpf_test tests[] = { .errstr = "invalid destination", .result = REJECT, }, + { + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, { "calls: jumping across function bodies. test1", .insns = { @@ -8366,6 +8628,30 @@ static struct bpf_test tests[] = { .errstr = "last insn", .result = REJECT, }, + { + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, + }, { "calls: two calls with bad fallthrough", .insns = { @@ -8459,6 +8745,36 @@ static struct bpf_test tests[] = { .errstr = "cannot spill", .result = REJECT, }, + { + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, + }, { "calls: two calls with stack write and void return", .insns = { @@ -9056,6 +9372,287 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, { "calls: caller stack init to zero or map_value_or_null", .insns = { -- cgit v1.2.3 From 21567eded9805acbf69807671eb94d3536f797d0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Dec 2017 16:19:30 -0800 Subject: libbpf: fix Makefile exit code if libelf not found /bin/sh's exit does not recognize -1 as a number, leading to the following error message: /bin/sh: 1: exit: Illegal number: -1 Use 1 as the exit code. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4555304dc18e..8ed43ae9db9b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -213,10 +213,10 @@ PHONY += force elfdep bpfdep force: elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi + @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi + @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. -- cgit v1.2.3 From 4ca998fe46b1fce4988005851df2c85b7bf2addf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Dec 2017 15:11:30 -0800 Subject: selftests/bpf: add netdevsim to config BPF offload tests (test_offload.py) will require netdevsim to be built, add it to config. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 9d4897317c77..983dd25d49f4 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -4,3 +4,4 @@ CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y +CONFIG_NETDEVSIM=m -- cgit v1.2.3 From 7d9890ef505a8c2a778d304535e26e827d58c466 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 19 Dec 2017 15:53:11 -0500 Subject: libbpf: Fix build errors. These elf object pieces are of type Elf64_Xword and therefore could be "long long" on some builds. Cast to "long long" and use printf format %lld to deal with this since we are building with -Werror=format. Signed-off-by: David S. Miller Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5b83875b3594..e9c4b7cabcf2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -910,8 +910,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - pr_debug("relo for %ld value %ld name %d\n", - rel.r_info >> 32, sym.st_value, sym.st_name); + pr_debug("relo for %lld value %lld name %d\n", + (long long) (rel.r_info >> 32), + (long long) sym.st_value, sym.st_name); if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", -- cgit v1.2.3 From 92e70b83c0fef79dea9b8be06944b1eaffa4a9ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Thu, 14 Dec 2017 21:26:08 +0100 Subject: tools/gpio: Don't use u_int32_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit u_int32_t is a non-standard version of uint32_t, that was apparently introduced by BSD. Use uint32_t from stdint.h instead. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Linus Walleij --- tools/gpio/gpio-event-mon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1c14c2595158..be6768e21b09 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -27,8 +28,8 @@ int monitor_device(const char *device_name, unsigned int line, - u_int32_t handleflags, - u_int32_t eventflags, + uint32_t handleflags, + uint32_t eventflags, unsigned int loops) { struct gpioevent_request req; @@ -145,8 +146,8 @@ int main(int argc, char **argv) const char *device_name = NULL; unsigned int line = -1; unsigned int loops = 0; - u_int32_t handleflags = GPIOHANDLE_REQUEST_INPUT; - u_int32_t eventflags = 0; + uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT; + uint32_t eventflags = 0; int c; while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { -- cgit v1.2.3 From 5d0c138eff18651549e5501aaca8f3726cc9a6b5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 19 Dec 2017 10:37:02 -0800 Subject: selftests: rtnetlink: add gretap test cases Add test cases for gretap and ip6gretap, native mode and external (collect metadata) mode. Signed-off-by: William Tu Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 98 ++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5215493166c9..dada4ab69142 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -502,6 +502,102 @@ kci_test_macsec() echo "PASS: macsec" } +kci_test_gretap() +{ + testns="testns" + DEV_NS=gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: gretap: iproute2 too old" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: gretap" + return 1 + fi + echo "PASS: gretap" + + ip netns del "$testns" +} + +kci_test_ip6gretap() +{ + testns="testns" + DEV_NS=ip6gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help ip6gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6gretap: iproute2 too old" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \ + key 102 local fc00:100::1 remote fc00:100::2 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6gretap" + return 1 + fi + echo "PASS: ip6gretap" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -514,6 +610,8 @@ kci_test_rtnl() kci_test_route_get kci_test_tc kci_test_gre + kci_test_gretap + kci_test_ip6gretap kci_test_bridge kci_test_addrlabel kci_test_ifalias -- cgit v1.2.3 From 7105e828c087de970fcb5a9509db51bfe6bd7894 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Dec 2017 13:42:57 +0100 Subject: bpf: allow for correlation of maps and helpers in dump Currently a dump of an xlated prog (post verifier stage) doesn't correlate used helpers as well as maps. The prog info lists involved map ids, however there's no correlation of where in the program they are used as of today. Likewise, bpftool does not correlate helper calls with the target functions. The latter can be done w/o any kernel changes through kallsyms, and also has the advantage that this works with inlined helpers and BPF calls. Example, via interpreter: # tc filter show dev foo ingress filter protocol all pref 49152 bpf chain 0 filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \ direct-action not_in_hw id 1 tag c74773051b364165 <-- prog id:1 * Output before patch (calls/maps remain unclear): # bpftool prog dump xlated id 1 <-- dump prog id:1 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = 0xffff95c47a8d4800 6: (85) call unknown#73040 7: (15) if r0 == 0x0 goto pc+18 8: (bf) r2 = r10 9: (07) r2 += -4 10: (bf) r1 = r0 11: (85) call unknown#73040 12: (15) if r0 == 0x0 goto pc+23 [...] * Output after patch: # bpftool prog dump xlated id 1 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] <-- map id:2 6: (85) call bpf_map_lookup_elem#73424 <-- helper call 7: (15) if r0 == 0x0 goto pc+18 8: (bf) r2 = r10 9: (07) r2 += -4 10: (bf) r1 = r0 11: (85) call bpf_map_lookup_elem#73424 12: (15) if r0 == 0x0 goto pc+23 [...] # bpftool map show id 2 <-- show/dump/etc map id:2 2: hash_of_maps flags 0x0 key 4B value 4B max_entries 3 memlock 4096B Example, JITed, same prog: # tc filter show dev foo ingress filter protocol all pref 49152 bpf chain 0 filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \ direct-action not_in_hw id 3 tag c74773051b364165 jited # bpftool prog show id 3 3: sched_cls tag c74773051b364165 loaded_at Dec 19/13:48 uid 0 xlated 384B jited 257B memlock 4096B map_ids 2 # bpftool prog dump xlated id 3 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] <-- map id:2 6: (85) call __htab_map_lookup_elem#77408 <-+ inlined rewrite 7: (15) if r0 == 0x0 goto pc+2 | 8: (07) r0 += 56 | 9: (79) r0 = *(u64 *)(r0 +0) <-+ 10: (15) if r0 == 0x0 goto pc+24 11: (bf) r2 = r10 12: (07) r2 += -4 [...] Example, same prog, but kallsyms disabled (in that case we are also not allowed to pass any relative offsets, etc, so prog becomes pointer sanitized on dump): # sysctl kernel.kptr_restrict=2 kernel.kptr_restrict = 2 # bpftool prog dump xlated id 3 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] 6: (85) call bpf_unspec#0 7: (15) if r0 == 0x0 goto pc+2 [...] Example, BPF calls via interpreter: # bpftool prog dump xlated id 1 0: (85) call pc+2#__bpf_prog_run_args32 1: (b7) r0 = 1 2: (95) exit 3: (b7) r0 = 2 4: (95) exit Example, BPF calls via JIT: # sysctl net.core.bpf_jit_enable=1 net.core.bpf_jit_enable = 1 # sysctl net.core.bpf_jit_kallsyms=1 net.core.bpf_jit_kallsyms = 1 # bpftool prog dump xlated id 1 0: (85) call pc+2#bpf_prog_3b185187f1855c4c_F 1: (b7) r0 = 1 2: (95) exit 3: (b7) r0 = 2 4: (95) exit And finally, an example for tail calls that is now working as well wrt correlation: # bpftool prog dump xlated id 2 [...] 10: (b7) r2 = 8 11: (85) call bpf_trace_printk#-41312 12: (bf) r1 = r6 13: (18) r2 = map[id:1] 15: (b7) r3 = 0 16: (85) call bpf_tail_call#12 17: (b7) r1 = 42 18: (6b) *(u16 *)(r6 +46) = r1 19: (b7) r0 = 0 20: (95) exit # bpftool map show id 1 1: prog_array flags 0x0 key 4B value 4B max_entries 1 memlock 4096B Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/prog.c | 181 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 172 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 037484ceaeaf..42ee8892549c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -401,6 +401,88 @@ static int do_show(int argc, char **argv) return err; } +#define SYM_MAX_NAME 256 + +struct kernel_sym { + unsigned long address; + char name[SYM_MAX_NAME]; +}; + +struct dump_data { + unsigned long address_call_base; + struct kernel_sym *sym_mapping; + __u32 sym_count; + char scratch_buff[SYM_MAX_NAME]; +}; + +static int kernel_syms_cmp(const void *sym_a, const void *sym_b) +{ + return ((struct kernel_sym *)sym_a)->address - + ((struct kernel_sym *)sym_b)->address; +} + +static void kernel_syms_load(struct dump_data *dd) +{ + struct kernel_sym *sym; + char buff[256]; + void *tmp, *address; + FILE *fp; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) + return; + + while (!feof(fp)) { + if (!fgets(buff, sizeof(buff), fp)) + break; + tmp = realloc(dd->sym_mapping, + (dd->sym_count + 1) * + sizeof(*dd->sym_mapping)); + if (!tmp) { +out: + free(dd->sym_mapping); + dd->sym_mapping = NULL; + fclose(fp); + return; + } + dd->sym_mapping = tmp; + sym = &dd->sym_mapping[dd->sym_count]; + if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2) + continue; + sym->address = (unsigned long)address; + if (!strcmp(sym->name, "__bpf_call_base")) { + dd->address_call_base = sym->address; + /* sysctl kernel.kptr_restrict was set */ + if (!sym->address) + goto out; + } + if (sym->address) + dd->sym_count++; + } + + fclose(fp); + + qsort(dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp); +} + +static void kernel_syms_destroy(struct dump_data *dd) +{ + free(dd->sym_mapping); +} + +static struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) +{ + struct kernel_sym sym = { + .address = key, + }; + + return dd->sym_mapping ? + bsearch(&sym, dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; +} + static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) { va_list args; @@ -410,8 +492,71 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) +static const char *print_call_pcrel(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address, + const struct bpf_insn *insn) { + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#%s", insn->off, sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#0x%lx", insn->off, address); + return dd->scratch_buff; +} + +static const char *print_call_helper(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%s", sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%lx", address); + return dd->scratch_buff; +} + +static const char *print_call(void *private_data, + const struct bpf_insn *insn) +{ + struct dump_data *dd = private_data; + unsigned long address = dd->address_call_base + insn->imm; + struct kernel_sym *sym; + + sym = kernel_syms_search(dd, address); + if (insn->src_reg == BPF_PSEUDO_CALL) + return print_call_pcrel(dd, sym, address, insn); + else + return print_call_helper(dd, sym, address); +} + +static const char *print_imm(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm) +{ + struct dump_data *dd = private_data; + + if (insn->src_reg == BPF_PSEUDO_MAP_FD) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u]", insn->imm); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%llx", (unsigned long long)full_imm); + return dd->scratch_buff; +} + +static void dump_xlated_plain(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -425,7 +570,7 @@ static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); - print_bpf_insn(print_insn, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { printf(" "); @@ -454,8 +599,15 @@ static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) +static void dump_xlated_json(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn_json, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -470,7 +622,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) jsonw_start_object(json_wtr); jsonw_name(json_wtr, "disasm"); - print_bpf_insn(print_insn_json, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { jsonw_name(json_wtr, "opcodes"); @@ -505,6 +657,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; + struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; char *filepath = NULL; @@ -592,6 +745,14 @@ static int do_dump(int argc, char **argv) goto err_free; } + if ((member_len == &info.jited_prog_len && + info.jited_prog_insns == 0) || + (member_len == &info.xlated_prog_len && + info.xlated_prog_insns == 0)) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); + goto err_free; + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -608,17 +769,19 @@ static int do_dump(int argc, char **argv) goto err_free; } } else { - if (member_len == &info.jited_prog_len) + if (member_len == &info.jited_prog_len) { disasm_print_insn(buf, *member_len, opcodes); - else + } else { + kernel_syms_load(&dd); if (json_output) - dump_xlated_json(buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes); else - dump_xlated_plain(buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes); + kernel_syms_destroy(&dd); + } } free(buf); - return 0; err_free: -- cgit v1.2.3 From c475ffad58a8a2f1d3a2bd433eaa491471981b49 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 20 Dec 2017 10:37:08 -0800 Subject: tools/bpf: adjust rlimit RLIMIT_MEMLOCK for test_dev_cgroup The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases, e.g. in a test machine mimicking our production system, this test may fail due to unable to charge the required memory for prog load: $ ./test_dev_cgroup libbpf: load bpf program failed: Operation not permitted libbpf: failed to load program 'cgroup/dev' libbpf: failed to load object './dev_cgroup.o' Failed to load DEV_CGROUP program ... Changing the default rlimit RLIMIT_MEMLOCK to unlimited makes the test pass. This patch also fixed a problem where when bpf_prog_load fails, cleanup_cgroup_environment() should not be called since setup_cgroup_environment() has not been invoked. Otherwise, the following confusing message will appear: ... (/home/yhs/local/linux/tools/testing/selftests/bpf/cgroup_helpers.c:95: errno: No such file or directory) Opening Cgroup Procs: /mnt/cgroup.procs ... Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_dev_cgroup.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 02c85d6c89b0..c1535b34f14f 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include @@ -23,15 +25,19 @@ int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; struct bpf_object *obj; int error = EXIT_FAILURE; int prog_fd, cgroup_fd; __u32 prog_cnt; + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); - goto err; + goto out; } if (setup_cgroup_environment()) { @@ -89,5 +95,6 @@ int main(int argc, char **argv) err: cleanup_cgroup_environment(); +out: return error; } -- cgit v1.2.3 From 1696784eb7b52b13b62d160c028ef2c2c981d4f2 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 21 Dec 2017 11:11:31 +1030 Subject: tools/gpio: Fix build error with musl libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GPIO tools build fails when using a buildroot toolchain that uses musl as it's C library: arm-broomstick-linux-musleabi-gcc -Wp,-MD,./.gpio-event-mon.o.d \ -Wp,-MT,gpio-event-mon.o -O2 -Wall -g -D_GNU_SOURCE \ -Iinclude -D"BUILD_STR(s)=#s" -c -o gpio-event-mon.o gpio-event-mon.c gpio-event-mon.c:30:6: error: unknown type name ‘u_int32_t’; did you mean ‘uint32_t’? u_int32_t handleflags, ^~~~~~~~~ uint32_t The glibc headers installed on my laptop include sys/types.h in unistd.h, but it appears that musl does not. Fixes: 97f69747d8b1 ("tools/gpio: add the gpio-event-mon tool") Cc: stable@vger.kernel.org Signed-off-by: Joel Stanley Signed-off-by: Linus Walleij --- tools/gpio/gpio-event-mon.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index be6768e21b09..dac4d4131d9b 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -24,6 +24,7 @@ #include #include #include +#include #include int monitor_device(const char *device_name, -- cgit v1.2.3 From fd05e57bb35ad5eb7e261b64e5cf46445250f842 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Sat, 23 Dec 2017 10:09:55 +0000 Subject: bpf: fix stacksafe exploration when comparing states Commit cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack") introduced a very relaxed check when comparing stacks of different states, effectively returning a positive result in many cases where it shouldn't. This can create problems in cases such as this following C pseudocode: long var; long *x = bpf_map_lookup(...); if (!x) return; if (*x != 0xbeef) var = 0; else var = 1; /* This is the key part, calling a helper causes an explored state * to be saved with the information that "var" is on the stack as * STACK_ZERO, since the helper is first met by the verifier after * the "var = 0" assignment. This state will however be wrongly used * also for the "var = 1" case, so the verifier assumes "var" is always * 0 and will replace the NULL assignment with nops, because the * search pruning prevents it from exploring the faulty branch. */ bpf_ktime_get_ns(); if (var) *(long *)0 = 0xbeef; Fix the issue by making sure that the stack is fully explored before returning a positive comparison result. Also attach a couple tests that highlight the bad behavior. In the first test, without this fix instructions 16 and 17 are replaced with nops instead of being rejected by the verifier. The second test, instead, allows a program to make a potentially illegal read from the stack. Fixes: cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack") Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3bacff0d6f91..5e79515d10c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -9715,6 +9715,57 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, + { + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 6b80ad299208b44ba33cb6df80bdaa3f63cf03e2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 22 Dec 2017 19:12:35 +0100 Subject: bpf: selftest for late caller stack size increase This checks that it is not possible to bypass the total stack size check in update_stack_depth() by calling a function that uses a large amount of stack memory *before* using a large amount of stack memory in the caller. Currently, the first added testcase causes a rejection as expected, but the second testcase is (AFAICS incorrectly) accepted: [...] #483/p calls: stack overflow using two frames (post-call access) FAIL Unexpected success to load! 0: (85) call pc+2 caller: R10=fp0,call_-1 callee: frame1: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_0 3: (72) *(u8 *)(r10 -300) = 0 4: (b7) r0 = 0 5: (95) exit returning from callee: frame1: R0_w=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_0 to caller at 1: R0_w=inv0 R10=fp0,call_-1 from 5 to 1: R0=inv0 R10=fp0,call_-1 1: (72) *(u8 *)(r10 -300) = 0 2: (95) exit processed 6 insns, stack depth 300+300 [...] Summary: 704 PASSED, 1 FAILED AFAICS the JIT-generated code for the second testcase shows that this really causes the stack pointer to be decremented by 300+300: first function: 00000000 55 push rbp 00000001 4889E5 mov rbp,rsp 00000004 4881EC58010000 sub rsp,0x158 0000000B 4883ED28 sub rbp,byte +0x28 [...] 00000025 E89AB3AFE5 call 0xffffffffe5afb3c4 0000002A C685D4FEFFFF00 mov byte [rbp-0x12c],0x0 [...] 00000041 4883C528 add rbp,byte +0x28 00000045 C9 leave 00000046 C3 ret second function: 00000000 55 push rbp 00000001 4889E5 mov rbp,rsp 00000004 4881EC58010000 sub rsp,0x158 0000000B 4883ED28 sub rbp,byte +0x28 [...] 00000025 C685D4FEFFFF00 mov byte [rbp-0x12c],0x0 [...] 0000003E 4883C528 add rbp,byte +0x28 00000042 C9 leave 00000043 C3 ret Signed-off-by: Jann Horn Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 34 +++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5e79515d10c5..41dcc7dbba42 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8729,6 +8729,40 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, .result = ACCEPT, }, + { + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, { "calls: spill into caller stack frame", .insns = { -- cgit v1.2.3 From 6b86c4217c231cbd268bd8c6fda025b27047d3ed Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Dec 2017 13:15:41 -0800 Subject: selftests/bpf: additional stack depth tests to test inner logic of stack depth tracking Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 121 ++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 41dcc7dbba42..b5a7a6c530dc 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8763,6 +8763,127 @@ static struct bpf_test tests[] = { .errstr = "combined stack size", .result = REJECT, }, + { + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", + }, { "calls: spill into caller stack frame", .insns = { -- cgit v1.2.3 From aada9ce644e53410954daa6beb1f7c4ca158abd7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Dec 2017 13:15:42 -0800 Subject: bpf: fix max call depth check fix off by one error in max call depth check and add a test Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b5a7a6c530dc..5d0a574ce270 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8884,6 +8884,41 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "combined stack", }, + { + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, + }, { "calls: spill into caller stack frame", .insns = { -- cgit v1.2.3 From 4bfe3bd3cc351efd1d51b3258b060e9445533888 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 27 Dec 2017 19:16:28 +0000 Subject: tools/bpftool: use version from the kernel source tree Bpftool determines it's own version based on the kernel version, which is picked from the linux/version.h header. It's strange to use the version of the installed kernel headers, and makes much more sense to use the version of the actual source tree, where bpftool sources are. Fix this by building kernelversion target and use the resulting string as bpftool version. Example: before: $ bpftool version bpftool v4.14.6 after: $ bpftool version bpftool v4.15.0-rc3 $bpftool version --json {"version":"4.15.0-rc3"} Signed-off-by: Roman Gushchin Reviewed-by: Jakub Kicinski Cc: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Makefile | 3 +++ tools/bpf/bpftool/main.c | 13 ++----------- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 3f17ad317512..f8f31a8d9269 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -23,6 +23,8 @@ endif LIBBPF = $(BPF_PATH)libbpf.a +BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) + $(LIBBPF): FORCE $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) @@ -38,6 +40,7 @@ CC = gcc CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' LIBS = -lelf -lbfd -lopcodes $(LIBBPF) INSTALL ?= install diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index ecd53ccf1239..3a0396d87c42 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -95,21 +94,13 @@ static int do_help(int argc, char **argv) static int do_version(int argc, char **argv) { - unsigned int version[3]; - - version[0] = LINUX_VERSION_CODE >> 16; - version[1] = LINUX_VERSION_CODE >> 8 & 0xf; - version[2] = LINUX_VERSION_CODE & 0xf; - if (json_output) { jsonw_start_object(json_wtr); jsonw_name(json_wtr, "version"); - jsonw_printf(json_wtr, "\"%u.%u.%u\"", - version[0], version[1], version[2]); + jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION); jsonw_end_object(json_wtr); } else { - printf("%s v%u.%u.%u\n", bin_name, - version[0], version[1], version[2]); + printf("%s v%s\n", bin_name, BPFTOOL_VERSION); } return 0; } -- cgit v1.2.3 From fb982666e380c1632a74495b68b3c33a66e76430 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 27 Dec 2017 19:16:29 +0000 Subject: tools/bpftool: fix bpftool build with bintutils >= 2.9 Bpftool build is broken with binutils version 2.29 and later. The cause is commit 003ca0fd2286 ("Refactor disassembler selection") in the binutils repo, which changed the disassembler() function signature. Fix this by adding a new "feature" to the tools/build/features infrastructure and make it responsible for decision which disassembler() function signature to use. Signed-off-by: Roman Gushchin Cc: Jakub Kicinski Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/Makefile | 29 +++++++++++++++++++++++ tools/bpf/bpf_jit_disasm.c | 7 ++++++ tools/bpf/bpftool/Makefile | 24 +++++++++++++++++++ tools/bpf/bpftool/jit_disasm.c | 7 ++++++ tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-disassembler-four-args.c | 15 ++++++++++++ 6 files changed, 86 insertions(+) create mode 100644 tools/build/feature/test-disassembler-four-args.c (limited to 'tools') diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 07a6697466ef..c8ec0ae16bf0 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -9,6 +9,35 @@ MAKE = make CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +FEATURE_USER = .bpf +FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_DISPLAY = libbfd disassembler-four-args + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean bpftool_clean +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +ifeq ($(feature-disassembler-four-args), 1) +CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + %.yacc.c: %.y $(YACC) -o $@ -d $< diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 75bf526a0168..30044bc4f389 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -72,7 +72,14 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes) disassemble_init_for_target(&info); +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else disassemble = disassembler(bfdf); +#endif assert(disassemble); do { diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f8f31a8d9269..2237bc43f71c 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -46,6 +46,30 @@ LIBS = -lelf -lbfd -lopcodes $(LIBBPF) INSTALL ?= install RM ?= rm -f +FEATURE_USER = .bpftool +FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_DISPLAY = libbfd disassembler-four-args + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +ifeq ($(feature-disassembler-four-args), 1) +CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + include $(wildcard *.d) all: $(OUTPUT)bpftool diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 1551d3918d4c..57d32e8a1391 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -107,7 +107,14 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) disassemble_init_for_target(&info); +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else disassemble = disassembler(bfdf); +#endif assert(disassemble); if (json_output) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 96982640fbf8..17f2c73fff8b 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -13,6 +13,7 @@ FILES= \ test-hello.bin \ test-libaudit.bin \ test-libbfd.bin \ + test-disassembler-four-args.bin \ test-liberty.bin \ test-liberty-z.bin \ test-cplus-demangle.bin \ @@ -188,6 +189,9 @@ $(OUTPUT)test-libpython-version.bin: $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl +$(OUTPUT)test-disassembler-four-args.bin: + $(BUILD) -lbfd -lopcodes + $(OUTPUT)test-liberty.bin: $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c new file mode 100644 index 000000000000..45ce65cfddf0 --- /dev/null +++ b/tools/build/feature/test-disassembler-four-args.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +int main(void) +{ + bfd *abfd = bfd_openr(NULL, NULL); + + disassembler(bfd_get_arch(abfd), + bfd_big_endian(abfd), + bfd_get_mach(abfd), + abfd); + + return 0; +} -- cgit v1.2.3 From 675fc275a3a2d905535207237402c6d8dcb5fa4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:09 -0800 Subject: bpf: offload: report device information for offloaded programs Report to the user ifindex and namespace information of offloaded programs. If device has disappeared return -ENODEV. Specify the namespace using dev/inode combination. CC: Eric W. Biederman Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index db1b0923a308..4e8c60acfa32 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -921,6 +921,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From 522622104ebabbc3372d2fad706b4d30cee13319 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:10 -0800 Subject: tools: bpftool: report device information for offloaded programs Print the just-exposed device information about device to which program is bound. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 2 ++ tools/bpf/bpftool/prog.c | 3 +++ 3 files changed, 57 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b62c94e3997a..6601c95a9258 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -44,7 +44,9 @@ #include #include #include +#include #include +#include #include #include @@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) free(obj); } } + +static char * +ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) +{ + struct stat st; + int err; + + err = stat("/proc/self/ns/net", &st); + if (err) { + p_err("Can't stat /proc/self: %s", strerror(errno)); + return NULL; + } + + if (st.st_dev != ns_dev || st.st_ino != ns_ino) + return NULL; + + return if_indextoname(ifindex, buf); +} + +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + printf(" dev "); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + printf("%s", name); + else + printf("ifindex %u ns_dev %llu ns_ino %llu", + ifindex, ns_dev, ns_inode); +} + +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + jsonw_name(json_wtr, "dev"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ifindex", ifindex); + jsonw_uint_field(json_wtr, "ns_dev", ns_dev); + jsonw_uint_field(json_wtr, "ns_inode", ns_inode); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_end_object(json_wtr); +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 8f6d3cac0347..65b526fe6e7e 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -96,6 +96,8 @@ struct pinned_obj { int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); struct cmd { const char *cmd; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index fd0873178503..98f871ed53d6 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + if (info->load_time) { char buf[32]; @@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("\n"); if (info->load_time) { -- cgit v1.2.3 From 752d7b4501c250bead233ab041738db84436b1af Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:11 -0800 Subject: selftests/bpf: test device info reporting for bound progs Check if bound programs report correct device info. Test in local namespace, in remote one, back to the local ns, remove the device and check that information is cleared. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 112 +++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index c940505c2978..e3c750f17cb8 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -18,6 +18,8 @@ import argparse import json import os import pprint +import random +import string import subprocess import time @@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) pp = pprint.PrettyPrinter() devs = [] # devices we created for clean up files = [] # files to be removed +netns = [] # net namespaces to be removed def log_get_sec(level=0): return "*" * (log_level + level) @@ -128,22 +131,25 @@ def rm(f): if f in files: files.remove(f) -def tool(name, args, flags, JSON=True, fail=True): +def tool(name, args, flags, JSON=True, ns="", fail=True): params = "" if JSON: params += "%s " % (flags["json"]) - ret, out = cmd(name + " " + params + args, fail=fail) + if ns != "": + ns = "ip netns exec %s " % (ns) + + ret, out = cmd(ns + name + " " + params + args, fail=fail) if JSON and len(out.strip()) != 0: return ret, json.loads(out) else: return ret, out -def bpftool(args, JSON=True, fail=True): - return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail) +def bpftool(args, JSON=True, ns="", fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) -def bpftool_prog_list(expected=None): - _, progs = bpftool("prog show", JSON=True, fail=True) +def bpftool_prog_list(expected=None, ns=""): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) if expected is not None: if len(progs) != expected: fail(True, "%d BPF programs loaded, expected %d" % @@ -158,13 +164,13 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) -def ip(args, force=False, JSON=True, fail=True): +def ip(args, force=False, JSON=True, ns="", fail=True): if force: args = "-force " + args - return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail) + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail) -def tc(args, JSON=True, fail=True): - return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail) +def tc(args, JSON=True, ns="", fail=True): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) def ethtool(dev, opt, args, fail=True): return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) @@ -178,6 +184,15 @@ def bpf_pinned(name): def bpf_bytecode(bytecode): return "bytecode \"%s\"" % (bytecode) +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + class DebugfsDir: """ Class for accessing DebugFS directories as a dictionary. @@ -237,6 +252,8 @@ class NetdevSim: self.dev = self._netdevsim_create() devs.append(self) + self.ns = "" + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) self.dfs_refresh() @@ -257,7 +274,7 @@ class NetdevSim: def remove(self): devs.remove(self) - ip("link del dev %s" % (self.dev["ifname"])) + ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns) def dfs_refresh(self): self.dfs = DebugfsDir(self.dfs_dir) @@ -285,6 +302,11 @@ class NetdevSim: time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + def set_ns(self, ns): + name = "1" if ns == "" else ns + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + def set_mtu(self, mtu, fail=True): return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) @@ -372,6 +394,8 @@ def clean_up(): dev.remove() for f in files: cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) def pin_prog(file_name, idx=0): progs = bpftool_prog_list(expected=(idx + 1)) @@ -381,6 +405,35 @@ def pin_prog(file_name, idx=0): return file_name, bpf_pinned(file_name) +def check_dev_info(other_ns, ns, pin_file=None, removed=False): + if removed: + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + return + progs = bpftool_prog_list(expected=int(not removed), ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not removed and not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + if removed: + fail(dev["ifindex"] != 0, "Device perameters not zero on removed") + fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") + fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") + # Parse command line parser = argparse.ArgumentParser() parser.add_argument("--log", help="output verbose log to given file") @@ -417,6 +470,12 @@ for s in samples: skip(ret != 0, "sample %s/%s not found, please compile it" % (bpf_test_dir, s)) +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + try: obj = bpf_obj("sample_ret0.o") bytecode = bpf_bytecode("1,6 0 0 4294967295,") @@ -549,6 +608,8 @@ try: progs = bpftool_prog_list(expected=1) fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") start_test("Test XDP prog replace with bad flags...") ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) @@ -673,6 +734,35 @@ try: fail(time_diff < delay_sec, "Removal process took %s, expected %s" % (time_diff, delay_sec)) + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + sim.set_xdp(obj, "offload") + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + pin_file, _ = pin_prog("/sys/fs/bpf/tmp") + sim.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info(True, "", pin_file=pin_file, removed=True) + print("%s: OK" % (os.path.basename(__file__))) finally: -- cgit v1.2.3 From d36f45e5b46723cf2d4147173e18c52d4143176d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 25 Dec 2017 14:43:04 -0800 Subject: selftests/net: fix bugs in address and port initialization Address/port initialization should work correctly regardless of the order in which command line arguments are supplied, E.g, cfg_port should be used to connect to the remote host even if it is processed after -D, src/dst address initialization should not require that [-4|-6] be specified before the -S or -D args, receiver should be able to bind to *. Achieve this by making sure that the address/port structures are initialized after all command line options are parsed. Store cfg_port in host-byte order, and use htons() to set up the sin_port/sin6_port before bind/connect, so that the network system calls get the correct values in network-byte order. Signed-off-by: Sowmini Varadhan Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/msg_zerocopy.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 3ab6ec403905..e11fe84de0fd 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -259,22 +259,28 @@ static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len) return sizeof(*ip6h); } -static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; switch (domain) { case PF_INET: + memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = AF_INET; addr4->sin_port = htons(cfg_port); - if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(cfg_port); - if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) error(1, 0, "ipv6 parse error: %s", str_addr); break; default: @@ -603,6 +609,7 @@ static void parse_opts(int argc, char **argv) sizeof(struct tcphdr) - 40 /* max tcp options */; int c; + char *daddr = NULL, *saddr = NULL; cfg_payload_len = max_payload_len; @@ -627,7 +634,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + daddr = optarg; break; case 'i': cfg_ifindex = if_nametoindex(optarg); @@ -638,7 +645,7 @@ static void parse_opts(int argc, char **argv) cfg_cork_mixed = true; break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); break; case 'r': cfg_rx = true; @@ -647,7 +654,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': - setup_sockaddr(cfg_family, optarg, &cfg_src_addr); + saddr = optarg; break; case 't': cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; @@ -660,6 +667,8 @@ static void parse_opts(int argc, char **argv) break; } } + setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); + setup_sockaddr(cfg_family, saddr, &cfg_src_addr); if (cfg_payload_len > max_payload_len) error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); -- cgit v1.2.3 From ef27e2ccde77c92245eccec97be32b5996a80efc Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 26 Dec 2017 11:10:07 -0800 Subject: selftests: rtnetlink: add erspan and ip6erspan Add test cases for ipv4, ipv6 erspan, v1 and v2 native mode and external (collect metadata) mode. Signed-off-by: William Tu Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 131 +++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index dada4ab69142..a622eeecc3a6 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -598,6 +598,135 @@ kci_test_ip6gretap() ip netns del "$testns" } +kci_test_erspan() +{ + testns="testns" + DEV_NS=erspan00 + ret=0 + + ip link help erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: erspan" + return 1 + fi + echo "PASS: erspan" + + ip netns del "$testns" +} + +kci_test_ip6erspan() +{ + testns="testns" + DEV_NS=ip6erspan00 + ret=0 + + ip link help ip6erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel ip6erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel ip6erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" \ + type ip6erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6erspan" + return 1 + fi + echo "PASS: ip6erspan" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -612,6 +741,8 @@ kci_test_rtnl() kci_test_gre kci_test_gretap kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan kci_test_bridge kci_test_addrlabel kci_test_ifalias -- cgit v1.2.3 From 6ed361586b323e576fd5536078fe9f2ee7906e61 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Jan 2018 21:37:36 +1100 Subject: selftests/powerpc: Add a test of SEGV error behaviour Add a test case of the error code reported when we take a SEGV on a mapped but inaccessible area. We broke this recently. Based on a test case from John Sperbeck . Acked-by: John Sperbeck Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/.gitignore | 3 +- tools/testing/selftests/powerpc/mm/Makefile | 2 +- tools/testing/selftests/powerpc/mm/segv_errors.c | 78 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/mm/segv_errors.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index e715a3f2fbf4..7d7c42ed6de9 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,4 +1,5 @@ hugetlb_vs_thp_test subpage_prot tempfile -prot_sao \ No newline at end of file +prot_sao +segv_errors \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index bf315bcbe663..8ebbe96d80a8 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors TEST_GEN_FILES := tempfile include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c new file mode 100644 index 000000000000..06ae76ee3ea1 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/segv_errors.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2017 John Sperbeck + * + * Test that an access to a mapped but inaccessible area causes a SEGV and + * reports si_code == SEGV_ACCERR. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +static bool faulted; +static int si_code; + +static void segv_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + si_code = info->si_code; + regs->nip += 4; +} + +int test_segv_errors(void) +{ + struct sigaction act = { + .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO, + }; + char c, *p = NULL; + + p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + FAIL_IF(p == MAP_FAILED); + + FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0); + + faulted = false; + si_code = 0; + + /* + * We just need a compiler barrier, but mb() works and has the nice + * property of being easy to spot in the disassembly. + */ + mb(); + c = *p; + mb(); + + FAIL_IF(!faulted); + FAIL_IF(si_code != SEGV_ACCERR); + + faulted = false; + si_code = 0; + + mb(); + *p = c; + mb(); + + FAIL_IF(!faulted); + FAIL_IF(si_code != SEGV_ACCERR); + + return 0; +} + +int main(void) +{ + return test_harness(test_segv_errors, "segv_errors"); +} -- cgit v1.2.3 From 65b875bcc816335be41c336a595adbc10bd885cb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Jan 2018 14:48:35 -0800 Subject: tools: bpftool: rename cgroup list -> show in the code So far we have used "show" as a keyword for listing programs and maps. Use the word "show" in the code for cgroups too, next commit will alias show and list. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/cgroup.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 34ca303d72bc..24091d87bee3 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -41,7 +41,7 @@ static enum bpf_attach_type parse_attach_type(const char *str) return __MAX_BPF_ATTACH_TYPE; } -static int list_bpf_prog(int id, const char *attach_type_str, +static int show_bpf_prog(int id, const char *attach_type_str, const char *attach_flags_str) { struct bpf_prog_info info = {}; @@ -77,7 +77,7 @@ static int list_bpf_prog(int id, const char *attach_type_str, return 0; } -static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) { __u32 prog_ids[1024] = {0}; char *attach_flags_str; @@ -111,23 +111,23 @@ static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) } for (iter = 0; iter < prog_cnt; iter++) - list_bpf_prog(prog_ids[iter], attach_type_strings[type], + show_bpf_prog(prog_ids[iter], attach_type_strings[type], attach_flags_str); return 0; } -static int do_list(int argc, char **argv) +static int do_show(int argc, char **argv) { enum bpf_attach_type type; int cgroup_fd; int ret = -1; if (argc < 1) { - p_err("too few parameters for cgroup list\n"); + p_err("too few parameters for cgroup show\n"); goto exit; } else if (argc > 1) { - p_err("too many parameters for cgroup list\n"); + p_err("too many parameters for cgroup show\n"); goto exit; } @@ -147,10 +147,10 @@ static int do_list(int argc, char **argv) /* * Not all attach types may be supported, so it's expected, * that some requests will fail. - * If we were able to get the list for at least one + * If we were able to get the show for at least one * attach type, let's return 0. */ - if (list_attached_bpf_progs(cgroup_fd, type) == 0) + if (show_attached_bpf_progs(cgroup_fd, type) == 0) ret = 0; } @@ -294,7 +294,7 @@ static int do_help(int argc, char **argv) } static const struct cmd cmds[] = { - { "list", do_list }, + { "list", do_show }, { "attach", do_attach }, { "detach", do_detach }, { "help", do_help }, -- cgit v1.2.3 From 6ebe6dbd6886af07b102aca42e44edbee94a22d9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Jan 2018 14:48:36 -0800 Subject: tools: bpftool: alias show and list commands iproute2 seems to accept show and list as aliases. Let's do the same thing, and by allowing both bring cgroup syntax back in line with maps and progs. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 6 +++--- tools/bpf/bpftool/Documentation/bpftool-map.rst | 6 +++--- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 6 +++--- tools/bpf/bpftool/Documentation/bpftool.rst | 6 +++--- tools/bpf/bpftool/bash-completion/bpftool | 8 ++++---- tools/bpf/bpftool/cgroup.c | 3 ++- tools/bpf/bpftool/map.c | 3 ++- tools/bpf/bpftool/prog.c | 3 ++- 8 files changed, 22 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 45c71b1f682b..2fe2a1bdbe3e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -15,12 +15,12 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **list** | **attach** | **detach** | **help** } + { **show** | **list** | **attach** | **detach** | **help** } MAP COMMANDS ============= -| **bpftool** **cgroup list** *CGROUP* +| **bpftool** **cgroup { show | list }** *CGROUP* | **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] | **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* | **bpftool** **cgroup help** @@ -31,7 +31,7 @@ MAP COMMANDS DESCRIPTION =========== - **bpftool cgroup list** *CGROUP* + **bpftool cgroup { show | list }** *CGROUP* List all programs attached to the cgroup *CGROUP*. Output will start with program ID followed by attach type, diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 421cabc417e6..0ab32b312aec 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -15,13 +15,13 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } MAP COMMANDS ============= -| **bpftool** **map show** [*MAP*] +| **bpftool** **map { show | list }** [*MAP*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* **key** *BYTES* @@ -36,7 +36,7 @@ MAP COMMANDS DESCRIPTION =========== - **bpftool map show** [*MAP*] + **bpftool map { show | list }** [*MAP*] Show information about loaded maps. If *MAP* is specified show information only about given map, otherwise list all maps currently loaded on the system. diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 81c97c0e9b67..e4ceee7f2dff 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,12 +15,12 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } MAP COMMANDS ============= -| **bpftool** **prog show** [*PROG*] +| **bpftool** **prog { show | list }** [*PROG*] | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* @@ -31,7 +31,7 @@ MAP COMMANDS DESCRIPTION =========== - **bpftool prog show** [*PROG*] + **bpftool prog { show | list }** [*PROG*] Show information about loaded programs. If *PROG* is specified show information only about given program, otherwise list all programs currently loaded on the system. diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6732a5a617e4..20689a321ffe 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -22,13 +22,13 @@ SYNOPSIS | { **-j** | **--json** } [{ **-p** | **--pretty** }] } *MAP-COMMANDS* := - { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } - *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | **load** | **help** } - *CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** } + *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 7febee05c8e7..0137866bb8f6 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -197,7 +197,7 @@ _bpftool() local PROG_TYPE='id pinned tag' case $command in - show) + show|list) [[ $prev != "$command" ]] && return 0 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) return 0 @@ -232,7 +232,7 @@ _bpftool() ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help pin show' -- \ + COMPREPLY=( $( compgen -W 'dump help pin show list' -- \ "$cur" ) ) ;; esac @@ -240,7 +240,7 @@ _bpftool() map) local MAP_TYPE='id pinned' case $command in - show|dump) + show|list|dump) case $prev in $command) COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) @@ -343,7 +343,7 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ - lookup pin show update' -- "$cur" ) ) + lookup pin show list update' -- "$cur" ) ) ;; esac ;; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 24091d87bee3..35f5f003df28 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -277,7 +277,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s list CGROUP\n" + "Usage: %s %s { show | list } CGROUP\n" " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" " %s %s detach CGROUP ATTACH_TYPE PROG\n" " %s %s help\n" @@ -294,6 +294,7 @@ static int do_help(int argc, char **argv) } static const struct cmd cmds[] = { + { "show", do_show }, { "list", do_show }, { "attach", do_attach }, { "detach", do_detach }, diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a8c3a33dd185..8d7db9d6b9cd 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -861,7 +861,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s show [MAP]\n" + "Usage: %s %s { show | list } [MAP]\n" " %s %s dump MAP\n" " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" " %s %s lookup MAP key BYTES\n" @@ -885,6 +885,7 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { "update", do_update }, diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 98f871ed53d6..5577960bffe4 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -836,7 +836,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s show [PROG]\n" + "Usage: %s %s { show | list } [PROG]\n" " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" @@ -854,6 +854,7 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { "pin", do_pin }, -- cgit v1.2.3 From b4fac96d9a672def4e24c1858adb7e9351471815 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Jan 2018 14:48:37 -0800 Subject: tools: bpftool: remove new lines from errors It's a little bit unusual for kernel style, but we add the new line character to error strings inside the p_err() function. We do this because new lines at the end of error strings will break JSON output. Fix a few p_err("..\n") which snuck in recently. Fixes: 5ccda64d38cc ("bpftool: implement cgroup bpf operations") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/cgroup.c | 18 +++++++++--------- tools/bpf/bpftool/prog.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 35f5f003df28..cae32a61cb18 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -124,16 +124,16 @@ static int do_show(int argc, char **argv) int ret = -1; if (argc < 1) { - p_err("too few parameters for cgroup show\n"); + p_err("too few parameters for cgroup show"); goto exit; } else if (argc > 1) { - p_err("too many parameters for cgroup show\n"); + p_err("too many parameters for cgroup show"); goto exit; } cgroup_fd = open(argv[0], O_RDONLY); if (cgroup_fd < 0) { - p_err("can't open cgroup %s\n", argv[1]); + p_err("can't open cgroup %s", argv[1]); goto exit; } @@ -171,19 +171,19 @@ static int do_attach(int argc, char **argv) int i; if (argc < 4) { - p_err("too few parameters for cgroup attach\n"); + p_err("too few parameters for cgroup attach"); goto exit; } cgroup_fd = open(argv[0], O_RDONLY); if (cgroup_fd < 0) { - p_err("can't open cgroup %s\n", argv[1]); + p_err("can't open cgroup %s", argv[1]); goto exit; } attach_type = parse_attach_type(argv[1]); if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type\n"); + p_err("invalid attach type"); goto exit_cgroup; } @@ -199,7 +199,7 @@ static int do_attach(int argc, char **argv) } else if (is_prefix(argv[i], "override")) { attach_flags |= BPF_F_ALLOW_OVERRIDE; } else { - p_err("unknown option: %s\n", argv[i]); + p_err("unknown option: %s", argv[i]); goto exit_cgroup; } } @@ -229,13 +229,13 @@ static int do_detach(int argc, char **argv) int ret = -1; if (argc < 4) { - p_err("too few parameters for cgroup detach\n"); + p_err("too few parameters for cgroup detach"); goto exit; } cgroup_fd = open(argv[0], O_RDONLY); if (cgroup_fd < 0) { - p_err("can't open cgroup %s\n", argv[1]); + p_err("can't open cgroup %s", argv[1]); goto exit; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5577960bffe4..c6a28be4665c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -813,12 +813,12 @@ static int do_load(int argc, char **argv) usage(); if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { - p_err("failed to load program\n"); + p_err("failed to load program"); return -1; } if (do_pin_fd(prog_fd, argv[1])) { - p_err("failed to pin program\n"); + p_err("failed to pin program"); return -1; } -- cgit v1.2.3 From 5c65a2fdb36b21c3f6ac534c5e12f81cf67726b1 Mon Sep 17 00:00:00 2001 From: Elad Wexler Date: Sat, 30 Dec 2017 18:11:51 +0200 Subject: tools: usb: usbip_device_driver: prefer 'unsigned int' to 'unsigned' Fixup a coding style issue Signed-off-by: Elad Wexler Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/usbip_device_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c index e059b7d1ec5b..b2dac82975af 100644 --- a/tools/usb/usbip/libsrc/usbip_device_driver.c +++ b/tools/usb/usbip/libsrc/usbip_device_driver.c @@ -77,7 +77,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) const char *path, *name; char filepath[SYSFS_PATH_MAX]; struct usb_device_descriptor descr; - unsigned i; + unsigned int i; FILE *fd = NULL; struct udev_device *plat; const char *speed; -- cgit v1.2.3 From f7a5d7b3ab3c4865af85b431e1cd18b66b6c3b0e Mon Sep 17 00:00:00 2001 From: Elad Wexler Date: Sat, 30 Dec 2017 18:01:39 +0200 Subject: tools: usb: usbip: fix fd leak in case of 'fread' failure Fix possible resource leak: fd Signed-off-by: Elad Wexler Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/usbip_device_driver.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c index b2dac82975af..ec3a0b794f15 100644 --- a/tools/usb/usbip/libsrc/usbip_device_driver.c +++ b/tools/usb/usbip/libsrc/usbip_device_driver.c @@ -92,7 +92,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) return -1; ret = fread((char *) &descr, sizeof(descr), 1, fd); if (ret < 0) - return -1; + goto err; fclose(fd); copy_descr_attr(dev, &descr, bDeviceClass); @@ -124,6 +124,9 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) name = udev_device_get_sysname(plat); strncpy(dev->busid, name, SYSFS_BUS_ID_SIZE); return 0; +err: + fclose(fd); + return -1; } static int is_my_device(struct udev_device *dev) -- cgit v1.2.3 From 3ced9b600234e6bb13cb25654ce44c4896485cf2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 4 Jan 2018 13:55:04 -0800 Subject: tools/bpf: add a bpf selftest for stacktrace Added a bpf selftest in test_progs at tools directory for stacktrace. The test will populate a hashtable map and a stacktrace map at the same time with the same key, stackid. The user space will compare both maps, using BPF_MAP_LOOKUP_ELEM command and BPF_MAP_GET_NEXT_KEY command, to ensure that both have the same set of keys. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_progs.c | 127 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_stacktrace_map.c | 62 +++++++++++ 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_stacktrace_map.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 1304753d29ea..a8aa7e251c8e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -19,7 +19,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ - test_l4lb_noinline.o test_xdp_noinline.o + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 09087ab12293..b549308abd19 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -837,6 +837,132 @@ static void test_tp_attach_query(void) free(query); } +static int compare_map_keys(int map1_fd, int map2_fd) +{ + __u32 key, next_key; + char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)]; + int err; + + err = bpf_map_get_next_key(map1_fd, NULL, &key); + if (err) + return err; + err = bpf_map_lookup_elem(map2_fd, &key, val_buf); + if (err) + return err; + + while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) { + err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf); + if (err) + return err; + + key = next_key; + } + if (errno != ENOENT) + return -1; + + return 0; +} + +static void test_stacktrace_map() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int bytes, efd, err, pmu_fd, prog_fd; + struct perf_event_attr attr = {}; + __u32 key, val, duration = 0; + struct bpf_object *obj; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto out; + + /* Get the ID for the sched/sched_switch tracepoint */ + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + /* Open the perf event and attach bpf progrram */ + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (CHECK(control_map_fd < 0, "bpf_find_map control_map", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + ; /* fall through */ + +disable_pmu: + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + +close_pmu: + close(pmu_fd); + +close_prog: + bpf_object__close(obj); + +out: + return; +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -852,6 +978,7 @@ int main(void) test_pkt_md_access(); test_obj_name(); test_tp_attach_query(); + test_stacktrace_map(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c new file mode 100644 index 000000000000..76d85c5d08bd --- /dev/null +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 10000, +}; + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + __u32 key = 0, val = 0, *value_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(ctx, &stackmap, 0); + if ((int)key >= 0) + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ -- cgit v1.2.3 From 607bd2e502f5528e1aef94e4d6f8252ccc4dbd6a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 7 Jan 2018 12:45:16 +0200 Subject: selftests: fib_tests: Add test cases for IPv4/IPv6 FIB Add test cases to check that IPv4 and IPv6 react to a netdev being unregistered as expected. Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/fib_tests.sh | 146 +++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100755 tools/testing/selftests/net/fib_tests.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 500c74db746c..d7c30d366935 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,6 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh +TEST_PROGS += fib_tests.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh new file mode 100755 index 000000000000..767d2ab2385d --- /dev/null +++ b/tools/testing/selftests/net/fib_tests.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB behavior in response to +# different events. + +ret=0 + +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + +check_fail() +{ + if [ $1 -eq 0 ]; then + ret=1 + fi +} + +netns_create() +{ + local testns=$1 + + ip netns add $testns + ip netns exec $testns ip link set dev lo up +} + +fib_unreg_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip link del dev dummy0 + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_fail $? + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route test" + return 1 + fi + echo "PASS: unicast route test" +} + +fib_unreg_multipath_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link add dummy1 type dummy + ip netns exec testns ip link set dev dummy1 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 + + ip netns exec testns ip route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_err $? + + ip netns exec testns ip link del dev dummy0 + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + # In IPv6 we do not flush the entire multipath route. + check_err $? + + ip netns exec testns ip link del dev dummy1 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: multipath route test" + return 1 + fi + echo "PASS: multipath route test" +} + +fib_unreg_test() +{ + echo "Running netdev unregister tests" + + fib_unreg_unicast_test + fib_unreg_multipath_test +} + +fib_test() +{ + fib_unreg_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +ip route help 2>&1 | grep -q fibmatch +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing fibmatch" + exit 0 +fi + +fib_test + +exit $ret -- cgit v1.2.3 From 5adb7683b48e35bc863a37e026dc6038ded30560 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 7 Jan 2018 12:45:17 +0200 Subject: selftests: fib_tests: Add test cases for netdev down Check that IPv4 and IPv6 react the same when a netdev is being put administratively down. Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 141 +++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 767d2ab2385d..25ba74f8a37e 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -120,9 +120,150 @@ fib_unreg_test() fib_unreg_multipath_test } +fib_down_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_fail $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route test" + return 1 + fi + echo "PASS: unicast route test" +} + +fib_down_multipath_test_do() +{ + local down_dev=$1 + local up_dev=$2 + + ip netns exec testns ip route get fibmatch 203.0.113.1 \ + oif $down_dev &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + oif $down_dev &> /dev/null + check_fail $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 \ + oif $up_dev &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + oif $up_dev &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + grep $down_dev | grep -q "dead linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + grep $down_dev | grep -q "dead linkdown" + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + grep $up_dev | grep -q "dead linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + grep $up_dev | grep -q "dead linkdown" + check_fail $? +} + +fib_down_multipath_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link add dummy1 type dummy + ip netns exec testns ip link set dev dummy1 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 + + ip netns exec testns ip route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_err $? + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + fib_down_multipath_test_do "dummy0" "dummy1" + + ip netns exec testns ip link set dev dummy0 up + check_err $? + ip netns exec testns ip link set dev dummy1 down + check_err $? + + fib_down_multipath_test_do "dummy1" "dummy0" + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_fail $? + + ip netns exec testns ip link del dev dummy1 + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: multipath route test" + return 1 + fi + echo "PASS: multipath route test" +} + +fib_down_test() +{ + echo "Running netdev down tests" + + fib_down_unicast_test + fib_down_multipath_test +} + fib_test() { fib_unreg_test + fib_down_test } if [ "$(id -u)" -ne 0 ];then -- cgit v1.2.3 From 82e45b6fd29246f36ff8064e74d412c11feaab23 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 7 Jan 2018 12:45:18 +0200 Subject: selftests: fib_tests: Add test cases for netdev carrier change Check that IPv4 and IPv6 react the same when the carrier of a netdev is toggled. Local routes should not be affected by this, whereas unicast routes should. Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 142 +++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 25ba74f8a37e..a9154eefb2e2 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -260,10 +260,152 @@ fib_down_test() fib_down_multipath_test } +fib_carrier_local_test() +{ + ret=0 + + # Local routes should not be affected when carrier changes. + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link set dev dummy0 carrier on + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link set dev dummy0 carrier off + + ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 192.0.2.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: local route carrier test" + return 1 + fi + echo "PASS: local route carrier test" +} + +fib_carrier_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link set dev dummy0 carrier on + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link set dev dummy0 carrier off + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + check_err $? + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 192.0.2.2 | \ + grep -q "linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \ + grep -q "linkdown" + check_err $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route carrier test" + return 1 + fi + echo "PASS: unicast route carrier test" +} + +fib_carrier_test() +{ + echo "Running netdev carrier change tests" + + fib_carrier_local_test + fib_carrier_unicast_test +} + fib_test() { fib_unreg_test fib_down_test + fib_carrier_test } if [ "$(id -u)" -ne 0 ];then -- cgit v1.2.3 From e8d5134833006a46fcbefc5f4a84d0b62bd520e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:54:05 +0100 Subject: memremap: change devm_memremap_pages interface to use struct dev_pagemap This new interface is similar to how struct device (and many others) work. The caller initializes a 'struct dev_pagemap' as required and calls 'devm_memremap_pages'. This allows the pagemap structure to be embedded in another structure and thus container_of can be used. In this way application specific members can be stored in a containing struct. This will be used by the P2P infrastructure and HMM could probably be cleaned up to use it as well (instead of having it's own, similar 'hmm_devmem_pages_create' function). Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/iomap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index e1f75a1914a1..ff9d3a5825e1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { - resource_size_t offset = res->start; + resource_size_t offset = pgmap->res.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) return nfit_res->buf + offset - nfit_res->res.start; - return devm_memremap_pages(dev, res, ref, altmap); + return devm_memremap_pages(dev, pgmap); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); -- cgit v1.2.3 From 912ec316686df352028afb6efec59e47a958a24d Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 5 Jan 2018 17:31:18 +0100 Subject: selftests: seccomp: fix compile error seccomp_bpf aarch64-linux-gnu-gcc -Wl,-no-as-needed -Wall -lpthread seccomp_bpf.c -o seccomp_bpf seccomp_bpf.c: In function 'tracer_ptrace': seccomp_bpf.c:1720:12: error: '__NR_open' undeclared (first use in this function) if (nr == __NR_open) ^~~~~~~~~ seccomp_bpf.c:1720:12: note: each undeclared identifier is reported only once for each function it appears in In file included from seccomp_bpf.c:48:0: seccomp_bpf.c: In function 'TRACE_syscall_ptrace_syscall_dropped': seccomp_bpf.c:1795:39: error: '__NR_open' undeclared (first use in this function) EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); ^ open(2) is a legacy syscall, replaced with openat(2) since 2.6.16. Thus new architectures in the kernel, such as arm64, don't implement these legacy syscalls. Fixes: a33b2d0359a0 ("selftests/seccomp: Add tests for basic ptrace actions") Signed-off-by: Anders Roxell Tested-by: Naresh Kamboju Cc: stable@vger.kernel.org Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 24dbf634e2dd..0b457e8e0f0c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1717,7 +1717,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); - if (nr == __NR_open) + if (nr == __NR_openat) change_syscall(_metadata, tracee, -1); } @@ -1792,7 +1792,7 @@ TEST_F(TRACE_syscall, ptrace_syscall_dropped) true); /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_allowed) -- cgit v1.2.3 From 4c1baad223906943b595a887305f2e8124821dad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Jan 2018 17:26:24 +0100 Subject: kselftest: fix OOM in memory compaction test Running the compaction_test sometimes results in out-of-memory failures. When I debugged this, it turned out that the code to reset the number of hugepages to the initial value is simply broken since we write into an open sysctl file descriptor multiple times without seeking back to the start. Adding the lseek here fixes the problem. Cc: stable@vger.kernel.org Reported-by: Naresh Kamboju Link: https://bugs.linaro.org/show_bug.cgi?id=3145 Signed-off-by: Arnd Bergmann Signed-off-by: Shuah Khan --- tools/testing/selftests/vm/compaction_test.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index a65b016d4c13..1097f04e4d80 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -137,6 +137,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + lseek(fd, 0, SEEK_SET); + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); -- cgit v1.2.3 From 2519c35fac12f7e17ed79b1e01aa6edc161823af Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 11 Jan 2018 13:46:07 +0000 Subject: tools/testing: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Shuah Khan --- tools/testing/selftests/nsfs/pidns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c index 1182d4e437a2..e0d86e1668c0 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/nsfs/pidns.c @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) return pr_err("NS_GET_PARENT returned a wrong namespace"); if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM) - return pr_err("Don't get EPERM");; + return pr_err("Don't get EPERM"); } kill(pid, SIGKILL); -- cgit v1.2.3 From 170b555777cf1329419b96f952e2c320c93ecc40 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 11 Jan 2018 10:28:35 -0700 Subject: selftests: media_tests: Fix Makefile 'clean' target warning Remove 'clean' target and change TEST_PROGS to TEST_GEN_PROGS so the common lib.mk 'clean' target clean these generated files. TEST_PROGS is for shell scripts and not for generated test executables. Signed-off-by: Shuah Khan --- tools/testing/selftests/media_tests/Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile index be5bd4ffb895..c82cec2497de 100644 --- a/tools/testing/selftests/media_tests/Makefile +++ b/tools/testing/selftests/media_tests/Makefile @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := media_device_test media_device_open video_device_test -all: $(TEST_PROGS) +TEST_GEN_PROGS := media_device_test media_device_open video_device_test +all: $(TEST_GEN_PROGS) include ../lib.mk - -clean: - rm -fr media_device_test media_device_open video_device_test -- cgit v1.2.3 From 6ee7bcc46ea796e158c628dfff9836420281fc6a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 9 Jan 2018 17:08:10 -0700 Subject: selftests: gen_kselftest_tar.h: Add SPDX license identifier Replace GPL license statement with SPDX GPL-2.0 license identifier. Signed-off-by: Shuah Khan --- tools/testing/selftests/gen_kselftest_tar.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh index 17d5bd0c0936..a27e2eec3586 100755 --- a/tools/testing/selftests/gen_kselftest_tar.sh +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -1,13 +1,11 @@ #!/bin/bash # +# SPDX-License-Identifier: GPL-2.0 # gen_kselftest_tar # Generate kselftest tarball # Author: Shuah Khan # Copyright (C) 2015 Samsung Electronics Co., Ltd. -# This software may be freely redistributed under the terms of the GNU -# General Public License (GPLv2). - # main main() { -- cgit v1.2.3 From 4b461dc238cc088417efba23e4e9c67bd5dcca74 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 9 Jan 2018 18:36:55 -0700 Subject: selftests: kselftest_install.sh: Add SPDX license identifier Replace GPL license statement with SPDX GPL-2.0 license identifier. Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_install.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh index 1555fbdb08da..ec304463883c 100755 --- a/tools/testing/selftests/kselftest_install.sh +++ b/tools/testing/selftests/kselftest_install.sh @@ -1,13 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Kselftest Install # Install kselftest tests # Author: Shuah Khan # Copyright (C) 2015 Samsung Electronics Co., Ltd. -# This software may be freely redistributed under the terms of the GNU -# General Public License (GPLv2). - install_loc=`pwd` main() -- cgit v1.2.3 From 7c466b97ccc6935c0a668cfefcda874c7560c3dc Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 9 Jan 2018 19:22:31 -0700 Subject: selftests: kselftest.h: Add SPDX license identifier Replace GPL license statement with SPDX GPL-2.0 license identifier. Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 1ae565ed9bf0..1a52b03962a3 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * kselftest.h: kselftest framework return codes to include from * selftests. @@ -5,7 +6,6 @@ * Copyright (c) 2014 Shuah Khan * Copyright (c) 2014 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ #ifndef __KSELFTEST_H #define __KSELFTEST_H -- cgit v1.2.3 From a4f0e38e1d0d87849e22cb2f9c3f0bbe87729f5d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 9 Jan 2018 18:38:55 -0700 Subject: selftests: media_tests: Add SPDX license identifier Replace GPL license statement with SPDX GPL-2.0 license identifier. Signed-off-by: Shuah Khan --- tools/testing/selftests/media_tests/media_device_open.c | 3 ++- tools/testing/selftests/media_tests/media_device_test.c | 3 ++- tools/testing/selftests/media_tests/video_device_test.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c index 44343c091a20..a5ce5434bafd 100644 --- a/tools/testing/selftests/media_tests/media_device_open.c +++ b/tools/testing/selftests/media_tests/media_device_open.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * media_device_open.c - Media Controller Device Open Test * * Copyright (c) 2016 Shuah Khan * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c index 5d49943e77d0..421a367e4bb3 100644 --- a/tools/testing/selftests/media_tests/media_device_test.c +++ b/tools/testing/selftests/media_tests/media_device_test.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * media_device_test.c - Media Controller Device ioctl loop Test * * Copyright (c) 2016 Shuah Khan * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c index 66d419c28653..0f6aef2e2593 100644 --- a/tools/testing/selftests/media_tests/video_device_test.c +++ b/tools/testing/selftests/media_tests/video_device_test.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * video_device_test - Video Device Test * * Copyright (c) 2016 Shuah Khan * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* -- cgit v1.2.3 From a38845729ea3985db5d2544ec3ef3dc8f6313a27 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jan 2018 20:29:09 -0800 Subject: bpf: offload: add map offload infrastructure BPF map offload follow similar path to program offload. At creation time users may specify ifindex of the device on which they want to create the map. Map will be validated by the kernel's .map_alloc_check callback and device driver will be called for the actual allocation. Map will have an empty set of operations associated with it (save for alloc and free callbacks). The real device callbacks are kept in map->offload->dev_ops because they have slightly different signatures. Map operations are called in process context so the driver may communicate with HW freely, msleep(), wait() etc. Map alloc and free callbacks are muxed via existing .ndo_bpf, and are always called with rtnl lock held. Maps and programs are guaranteed to be destroyed before .ndo_uninit (i.e. before unregister_netdev() returns). Map callbacks are invoked with bpf_devs_lock *read* locked, drivers must take care of exclusive locking if necessary. All offload-specific branches are marked with unlikely() (through bpf_map_is_dev_bound()), given that branch penalty will be negligible compared to IO anyway, and we don't want to penalize SW path unnecessarily. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4e8c60acfa32..69f96af4a569 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -245,6 +245,7 @@ union bpf_attr { * BPF_F_NUMA_NODE is set). */ char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit v1.2.3 From b394d468e7d75637e682a9be4a1181b27186c593 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 14:22:38 -0800 Subject: usercopy: Enhance and rename report_usercopy() In preparation for refactoring the usercopy checks to pass offset to the hardened usercopy report, this renames report_usercopy() to the more accurate usercopy_abort(), marks it as noreturn because it is, adds a hopefully helpful comment for anyone investigating such reports, makes the function available to the slab allocators, and adds new "detail" and "offset" arguments. Signed-off-by: Kees Cook --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9b341584eb1b..ae39444896d4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,6 +138,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "__reiserfs_panic", "lbug_with_loc", "fortify_panic", + "usercopy_abort", }; if (func->bind == STB_WEAK) -- cgit v1.2.3 From 45e5e1212af4633aa76db387ccaac8b41c8a7b6c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 15 Jan 2018 19:16:15 +0000 Subject: bpftool: recognize BPF_PROG_TYPE_CGROUP_DEVICE programs Bpftool doesn't recognize BPF_PROG_TYPE_CGROUP_DEVICE programs, so the prog show command prints the numeric type value: $ bpftool prog show 1: type 15 name bpf_prog1 tag ac9f93dbfd6d9b74 loaded_at Jan 15/07:58 uid 0 xlated 96B jited 105B memlock 4096B This patch defines the corresponding textual representation: $ bpftool prog show 1: cgroup_device name bpf_prog1 tag ac9f93dbfd6d9b74 loaded_at Jan 15/07:58 uid 0 xlated 96B jited 105B memlock 4096B Signed-off-by: Roman Gushchin Cc: Jakub Kicinski Cc: Quentin Monnet Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index c6a28be4665c..099e21cf1b5c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -66,6 +66,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) -- cgit v1.2.3 From 5e46664703b364434a2cbda3e6988fc24ae0ced5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 14 Jan 2018 22:50:07 +0900 Subject: selftest: ftrace: Fix to pick text symbols for kprobes Fix to pick text symbols for multiple kprobe testcase. kallsyms shows text symbols with " t " or " T " but current testcase picks all symbols including "t", so it picks data symbols if it includes 't' (e.g. "str"). This fixes it to find symbol lines with " t " or " T " (including spaces). Signed-off-by: Masami Hiramatsu Reported-by: Russell King Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index bb16cf91f1b5..e297bd7a2e79 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -12,8 +12,8 @@ case `uname -m` in *) OFFS=0;; esac -echo "Setup up to 256 kprobes" -grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ +echo "Setup up kprobes on first 256 text symbols" +grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||: echo 1 > events/kprobes/enable -- cgit v1.2.3 From 9739cee6918b222af6f75869ed33127a8b1c3d77 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 14 Jan 2018 22:50:36 +0900 Subject: selftest: ftrace: Fix to add 256 kprobe events correctly Current multiple-kprobe testcase only tries to add kprobe events on first 256 text symbols. However kprobes fails to probe on some text symbols (like blacklisted symbols). Thus in the worst case, the test can not add any kprobe events. To avoid that, continue to try adding kprobe events until 256 events. Also it confirms the number of registered kprobe events. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- .../ftrace/test.d/kprobe/multiple_kprobes.tc | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index e297bd7a2e79..ce361b9d62cf 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -12,9 +12,24 @@ case `uname -m` in *) OFFS=0;; esac -echo "Setup up kprobes on first 256 text symbols" +if [ -d events/kprobes ]; then + echo 0 > events/kprobes/enable + echo > kprobe_events +fi + +N=0 +echo "Setup up kprobes on first available 256 text symbols" grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ -head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||: +while read i; do + echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||: + test $N -eq 256 && break +done + +L=`wc -l kprobe_events` +if [ $L -ne $N ]; then + echo "The number of kprobes events ($L) is not $N" + exit_fail +fi echo 1 > events/kprobes/enable echo 0 > events/kprobes/enable -- cgit v1.2.3 From 14f1889fd4d7ea1f496e90ae055ecc086575a8cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 17:10:39 +1100 Subject: selftests: Fix loss of test output in run_kselftests.sh Commit fbcab13d2e25 ("selftests: silence test output by default") changed the run_tests logic as well as the logic to generate run_kselftests.sh to redirect test output away from the console. As discussed on the list and at kernel summit, this is not a desirable default as it means in order to debug a failure the console output is not sufficient, you also need access to the test machine to get the full test logs. Additionally it's impolite to write directly to /tmp/$TEST_NAME on shared systems. The change to the run_tests logic was reverted in commit a323335e62cc ("selftests: lib.mk: print individual test results to console by default"), and instead a summary option was added so that quiet output could be requested. However the change to run_kselftests.sh was left as-is. This commit applies the same logic to the run_kselftests.sh code, ie. the script now takes a "--summary" option which suppresses the output, but shows all output by default. Additionally instead of writing to /tmp/$TEST_NAME the output is redirected to the directory where the generated test script is located. Fixes: fbcab13d2e25 ("selftests: silence test output by default") Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 9 ++++++++- tools/testing/selftests/lib.mk | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index eaf599dc2137..7442dfb73b7f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -116,8 +116,15 @@ ifdef INSTALL_PATH @# Ask all targets to emit their test scripts echo "#!/bin/sh" > $(ALL_SCRIPT) - echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT) + echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT) echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT) + echo " OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT) + echo " cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT) + echo "else" >> $(ALL_SCRIPT) + echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT) + echo "fi" >> $(ALL_SCRIPT) for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba39..7de482a0519d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -77,7 +77,7 @@ endif define EMIT_TESTS @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ + echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ done; endef -- cgit v1.2.3 From 39b72ccdb278f53735af1a378e67e6110e3210ad Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Tue, 16 Jan 2018 15:51:47 -0800 Subject: tools: bpftool: add -DPACKAGE when including bfd.h bfd.h is requiring including of config.h except when PACKAGE or PACKAGE_VERSION are defined. /* PR 14072: Ensure that config.h is included first. */ #if !defined PACKAGE && !defined PACKAGE_VERSION #error config.h must be included before this header #endif This check has been introduced since May-2012. It doesn't show up in bfd.h on some Linux distribution, probably because distributions have remove it when building the package. However, sometimes the user might just build libfd from source code then link bpftool against it. For this case, bfd.h will be original that we need to define PACKAGE or PACKAGE_VERSION. Acked-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Makefile | 2 +- tools/build/feature/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 2237bc43f71c..26901ec87361 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -39,7 +39,7 @@ CC = gcc CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' LIBS = -lelf -lbfd -lopcodes $(LIBBPF) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 17f2c73fff8b..bc715f6ac320 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -190,7 +190,7 @@ $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl $(OUTPUT)test-disassembler-four-args.bin: - $(BUILD) -lbfd -lopcodes + $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes $(OUTPUT)test-liberty.bin: $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -- cgit v1.2.3 From d77be68955475fc2321e73fe006240248f2f8fef Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 16 Jan 2018 15:51:48 -0800 Subject: libbpf: fix string comparison for guessing eBPF program type libbpf is able to deduce the type of a program from the name of the ELF section in which it is located. However, the comparison is made on the first n characters, n being determined with sizeof() applied to the reference string (e.g. "xdp"). When such section names are supposed to receive a suffix separated with a slash (e.g. "kprobe/"), using sizeof() takes the final NUL character of the reference string into account, which implies that both strings must be equal. Instead, the desired behaviour would consist in taking the length of the string, *without* accounting for the ending NUL character, and to make sure the reference string is a prefix to the ELF section name. Subtract 1 to the total size of the string for obtaining the length for the comparison. Fixes: 583c90097f72 ("libbpf: add ability to guess program type based on section name") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9c4b7cabcf2..30c776375118 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1803,7 +1803,7 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -#define BPF_PROG_SEC(string, type) { string, sizeof(string), type } +#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type } static const struct { const char *sec; size_t len; -- cgit v1.2.3 From 7d386c624980e476612490df52eaa86c8e066edc Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 17 Jan 2018 00:20:30 +0100 Subject: libbpf: install the header file libbpf.h It seems like an oversight not to install the header file for libbpf, given the libbpf.so + libbpf.a files are installed. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 8ed43ae9db9b..54370654c708 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -192,7 +192,8 @@ install_lib: all_cmd install_headers: $(call QUIET_INSTALL, headers) \ - $(call do_install,bpf.h,$(prefix)/include/bpf,644) + $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); install: install_lib -- cgit v1.2.3 From 63c859101ec32cbc8fa5b708c7f17de63b15e56e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 17 Jan 2018 00:20:35 +0100 Subject: libbpf: cleanup Makefile, remove unused elements The plugin_dir_SQ variable is not used, remove it. The function update_dir is also unused, remove it. The variable $VERSION_FILES is empty, remove it. These all originates from the introduction of the Makefile, and is likely a copy paste from tools/lib/traceevent/Makefile. Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature check") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 54370654c708..8e15e48cb8f8 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -93,7 +93,6 @@ export prefix libdir src obj # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) LIB_FILE = libbpf.a libbpf.so @@ -150,7 +149,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: fixdep $(VERSION_FILES) all_cmd +all: fixdep all_cmd all_cmd: $(CMD_TARGETS) @@ -169,16 +168,6 @@ $(OUTPUT)libbpf.so: $(BPF_IN) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -204,7 +193,7 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ + $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ $(RM) LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -- cgit v1.2.3 From 7110d80d53f472956420cd05a6297f49b558b674 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 17 Jan 2018 00:20:40 +0100 Subject: libbpf: Makefile set specified permission mode The third parameter to do_install was not used by $(INSTALL) command. Fix this by only setting the -m option when the third parameter is supplied. The use of a third parameter was introduced in commit eb54e522a000 ("bpf: install libbpf headers on 'make install'"). Without this change, the header files are install as executables files (755). Fixes: eb54e522a000 ("bpf: install libbpf headers on 'make install'") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 8e15e48cb8f8..83714ca1f22b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -172,7 +172,7 @@ define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ - $(INSTALL) $1 '$(DESTDIR_SQ)$2' + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' endef install_lib: all_cmd -- cgit v1.2.3 From e65935969d0fac9df28d9c49bdbab5d8d8286a20 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Tue, 16 Jan 2018 16:05:21 -0800 Subject: tools: bpftool: improve architecture detection by using ifindex The current architecture detection method in bpftool is designed for host case. For offload case, we can't use the architecture of "bpftool" itself. Instead, we could call the existing "ifindex_to_name_ns" to get DEVNAME, then read pci id from /sys/class/dev/DEVNAME/device/vendor, finally we map vendor id to bfd arch name which will finally be used to select bfd backend for the disassembler. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 72 ++++++++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/jit_disasm.c | 16 +++++++++- tools/bpf/bpftool/main.h | 5 ++- tools/bpf/bpftool/prog.c | 12 ++++++- 4 files changed, 102 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6601c95a9258..0b482c0070e0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -34,6 +34,7 @@ /* Author: Jakub Kicinski */ #include +#include #include #include #include @@ -433,6 +434,77 @@ ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) return if_indextoname(ifindex, buf); } +static int read_sysfs_hex_int(char *path) +{ + char vendor_id_buf[8]; + int len; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + p_err("Can't open %s: %s", path, strerror(errno)); + return -1; + } + + len = read(fd, vendor_id_buf, sizeof(vendor_id_buf)); + close(fd); + if (len < 0) { + p_err("Can't read %s: %s", path, strerror(errno)); + return -1; + } + if (len >= (int)sizeof(vendor_id_buf)) { + p_err("Value in %s too long", path); + return -1; + } + + vendor_id_buf[len] = 0; + + return strtol(vendor_id_buf, NULL, 0); +} + +static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) +{ + char full_path[64]; + + snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s", + devname, entry_name); + + return read_sysfs_hex_int(full_path); +} + +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) +{ + char devname[IF_NAMESIZE]; + int vendor_id; + int device_id; + + if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { + p_err("Can't get net device name for ifindex %d: %s", ifindex, + strerror(errno)); + return NULL; + } + + vendor_id = read_sysfs_netdev_hex_int(devname, "vendor"); + if (vendor_id < 0) { + p_err("Can't get device vendor id for %s", devname); + return NULL; + } + + switch (vendor_id) { + case 0x19ee: + device_id = read_sysfs_netdev_hex_int(devname, "device"); + if (device_id != 0x4000 && + device_id != 0x6000 && + device_id != 0x6003) + p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); + return "NFP-6xxx"; + default: + p_err("Can't get bfd arch name for device vendor id 0x%04x", + vendor_id); + return NULL; + } +} + void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) { char name[IF_NAMESIZE]; diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 57d32e8a1391..87439320ef70 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...) return 0; } -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch) { disassembler_ftype disassemble; struct disassemble_info info; @@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) else init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + + /* Update architecture info for offload. */ + if (arch) { + const bfd_arch_info_type *inf = bfd_scan_arch(arch); + + if (inf) { + bfdf->arch_info = inf; + } else { + p_err("No libfd support for %s", arch); + return; + } + } + info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); info.buffer = image; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 65b526fe6e7e..b8e9584d6246 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -121,7 +121,10 @@ int do_cgroup(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch); void print_hex_data_json(uint8_t *data, size_t len); +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); + #endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 099e21cf1b5c..e8e2baaf93c2 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -776,7 +776,17 @@ static int do_dump(int argc, char **argv) } } else { if (member_len == &info.jited_prog_len) { - disasm_print_insn(buf, *member_len, opcodes); + const char *name = NULL; + + if (info.ifindex) { + name = ifindex_to_bfd_name_ns(info.ifindex, + info.netns_dev, + info.netns_ino); + if (!name) + goto err_free; + } + + disasm_print_insn(buf, *member_len, opcodes, name); } else { kernel_syms_load(&dd); if (json_output) -- cgit v1.2.3 From b223e3b4e03a13739ab462560b791a4c692fd86e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Jan 2018 12:35:21 +0300 Subject: tools/bpf_jit_disasm: silence a static checker warning There is a static checker warning that "proglen" has an upper bound but no lower bound. The allocation will just fail harmlessly so it's not a big deal. Signed-off-by: Dan Carpenter Signed-off-by: Daniel Borkmann --- tools/bpf/bpf_jit_disasm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 30044bc4f389..58c2bab4ef6e 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -172,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, { char *ptr, *pptr, *tmp; off_t off = 0; - int ret, flen, proglen, pass, ulen = 0; + unsigned int proglen; + int ret, flen, pass, ulen = 0; regmatch_t pmatch[1]; unsigned long base; regex_t regex; @@ -199,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); - ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", + ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx", &flen, &proglen, &pass, &base); if (ret != 4) { regfree(®ex); @@ -239,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } assert(ulen == proglen); - printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", + printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n", proglen, pass, flen); printf("%lx + :\n", base); -- cgit v1.2.3 From e7b2823a582a5bca5ee47644f448e317178e8824 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 18 Jan 2018 17:49:08 +0100 Subject: bpf: Sync kernel ABI header with tooling header Update tools/include/uapi/linux/bpf.h to bring it in sync with include/uapi/linux/bpf.h. The listed commits forgot to update it. Fixes: 02dd3291b2f0 ("bpf: finally expose xdp_rxq_info to XDP bpf-programs") Fixes: f19397a5c656 ("bpf: Add access to snd_cwnd and others in sock_ops") Fixes: 06ef0ccb5a36 ("bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 69f96af4a569..7c2259e8bc54 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -900,6 +900,9 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ }; enum sk_action { @@ -956,6 +959,12 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ }; /* List of known BPF sock_ops operators. @@ -1010,7 +1019,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; -- cgit v1.2.3 From 111e6b45315c8d13658f23885b30eb9df3ea2914 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 17 Jan 2018 16:52:03 -0800 Subject: selftests/bpf: make test_verifier run most programs to improve test coverage make test_verifier run all successfully loaded programs on 64-byte zero initialized data. For clsbpf and xdp it means empty 64-byte packet. For lwt and socket_filters it's 64-byte packet where skb->data points after L2. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 50 ++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 960179882a1c..6c22edb1f006 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -49,6 +50,8 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 4 +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -62,6 +65,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; + uint32_t retval; enum { UNDEF, ACCEPT, @@ -95,6 +99,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = -3, }, { "unreachable", @@ -210,6 +215,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "test8 ld_imm64", @@ -517,6 +523,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, + .retval = POINTER_VALUE, }, { "check valid spill/fill, skb mark", @@ -803,6 +810,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, + .retval = -ENOENT, }, { "jump test 4", @@ -1823,6 +1831,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 0xfaceb00c, }, { "PTR_TO_STACK store/load - bad alignment on off", @@ -1881,6 +1890,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr", + .retval = POINTER_VALUE, }, { "unpriv: add const to pointer", @@ -2054,6 +2064,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -2818,6 +2829,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test12 (and, good access)", @@ -2842,6 +2854,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test13 (branches, good access)", @@ -2872,6 +2885,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", @@ -2895,6 +2909,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test15 (spill with xadd)", @@ -3181,6 +3196,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test28 (marking on <=, bad access)", @@ -5798,6 +5814,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 0 /* csum_diff of 64-byte packet */, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -6166,6 +6183,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42 /* ultimate return value */, }, { "ld_ind: check calling conv, r1", @@ -6237,6 +6255,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check bpf_perf_event_data->sample_period byte load permitted", @@ -7224,6 +7243,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7244,6 +7264,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7685,6 +7706,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = TEST_DATA_LEN, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -8705,6 +8727,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "function calls to other bpf functions are allowed for root only", .result_unpriv = REJECT, .result = ACCEPT, + .retval = 1, }, { "calls: overlapping caller/callee", @@ -8900,6 +8923,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_ACT, .result = ACCEPT, + .retval = TEST_DATA_LEN, }, { "calls: callee using args1", @@ -8912,6 +8936,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "allowed for root only", .result_unpriv = REJECT, .result = ACCEPT, + .retval = POINTER_VALUE, }, { "calls: callee using wrong args2", @@ -8942,6 +8967,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "allowed for root only", .result_unpriv = REJECT, .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, }, { "calls: callee changing pkt pointers", @@ -8990,6 +9016,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN, }, { "calls: calls with stack arith", @@ -9008,6 +9035,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42, }, { "calls: calls with misaligned stack access", @@ -9041,6 +9069,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 43, }, { "calls: calls control flow, jump test 2", @@ -9533,6 +9562,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_XDP, .result = ACCEPT, + .retval = 42, }, { "calls: write into callee stack frame", @@ -10144,6 +10174,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, }, { "calls: pkt_ptr spill into caller stack 2", @@ -10209,6 +10240,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 1, }, { "calls: pkt_ptr spill into caller stack 4", @@ -10242,6 +10274,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 1, }, { "calls: pkt_ptr spill into caller stack 5", @@ -10650,10 +10683,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); + char data_in[TEST_DATA_LEN] = {}; int prog_type = test->prog_type; int map_fds[MAX_NR_MAPS]; const char *expected_err; - int i; + uint32_t retval; + int i, err; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -10696,6 +10731,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (fd_prog >= 0) { + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), + NULL, NULL, &retval, NULL); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error\n"); + goto fail_log; + } + if (!err && retval != test->retval && + test->retval != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, test->retval); + goto fail_log; + } + } (*passes)++; printf("OK%s\n", reject_from_alignment ? " (NOTE: reject due to unknown alignment)" : ""); -- cgit v1.2.3 From 52775b33bb5072fbc07b02c0cf4fe8da1f7ee7cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:28 -0800 Subject: bpf: offload: report device information about offloaded maps Tell user space about device on which the map was created. Unfortunate reality of user ABI makes sharing this code with program offload difficult but the information is the same. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7c2259e8bc54..af1f49ad8b88 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -938,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops -- cgit v1.2.3 From 064a07cba2919bcfbadf9edf5c26c740e69fa585 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:29 -0800 Subject: tools: bpftool: report device information for offloaded maps Print the information about device on which map is created. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 8d7db9d6b9cd..a152c1a5c94c 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -428,6 +428,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_name(json_wtr, "flags"); jsonw_printf(json_wtr, "%#x", info->map_flags); + + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + jsonw_uint_field(json_wtr, "bytes_key", info->key_size); jsonw_uint_field(json_wtr, "bytes_value", info->value_size); jsonw_uint_field(json_wtr, "max_entries", info->max_entries); @@ -469,7 +472,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (*info->name) printf("name %s ", info->name); - printf("flags 0x%x\n", info->map_flags); + printf("flags 0x%x", info->map_flags); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("\n"); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); -- cgit v1.2.3 From 7fedbb7c5a7c4bda418bc1056c06c81db36e4299 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:31 -0800 Subject: selftest/bpf: extend the offload test with map checks Check map device information is reported correctly, and perform basic map operations. Check device destruction gets rid of the maps and map allocation failure path by telling netdevsim to reject map offload via DebugFS. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/sample_map_ret0.c | 34 +++++ tools/testing/selftests/bpf/test_offload.py | 206 +++++++++++++++++++++++--- 3 files changed, 218 insertions(+), 25 deletions(-) create mode 100644 tools/testing/selftests/bpf/sample_map_ret0.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a8aa7e251c8e..3a44b655d852 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ - test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + sample_map_ret0.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c new file mode 100644 index 000000000000..0756303676ac --- /dev/null +++ b/tools/testing/selftests/bpf/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index e3c750f17cb8..833b9c1ec450 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -20,6 +20,7 @@ import os import pprint import random import string +import struct import subprocess import time @@ -156,6 +157,14 @@ def bpftool_prog_list(expected=None, ns=""): (len(progs), expected)) return progs +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + def bpftool_prog_list_wait(expected=0, n_retry=20): for i in range(n_retry): nprogs = len(bpftool_prog_list()) @@ -164,6 +173,14 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) +def bpftool_map_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nmaps = len(bpftool_map_list()) + if nmaps == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + def ip(args, force=False, JSON=True, ns="", fail=True): if force: args = "-force " + args @@ -193,6 +210,26 @@ def mknetns(n_retry=10): return name return None +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + class DebugfsDir: """ Class for accessing DebugFS directories as a dictionary. @@ -311,13 +348,13 @@ class NetdevSim: return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) - def set_xdp(self, bpf, mode, force=False, fail=True): + def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True): return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), - force=force, fail=fail) + force=force, JSON=JSON, fail=fail) - def unset_xdp(self, mode, force=False, fail=True): + def unset_xdp(self, mode, force=False, JSON=True, fail=True): return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), - force=force, fail=fail) + force=force, JSON=JSON, fail=fail) def ip_link_show(self, xdp): _, link = ip("link show dev %s" % (self['ifname'])) @@ -390,12 +427,16 @@ class NetdevSim: ################################################################################ def clean_up(): + global files, netns, devs + for dev in devs: dev.remove() for f in files: cmd("rm -f %s" % (f)) for ns in netns: cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] def pin_prog(file_name, idx=0): progs = bpftool_prog_list(expected=(idx + 1)) @@ -405,16 +446,31 @@ def pin_prog(file_name, idx=0): return file_name, bpf_pinned(file_name) -def check_dev_info(other_ns, ns, pin_file=None, removed=False): - if removed: - bpftool_prog_list(expected=0) - ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) - fail(ret == 0, "Showing prog with removed device did not fail") - fail(err["error"].find("No such device") == -1, - "Showing prog with removed device expected ENODEV, error is %s" % - (err["error"])) - return - progs = bpftool_prog_list(expected=int(not removed), ns=ns) +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + + bpftool_map_list(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) prog = progs[0] fail("dev" not in prog.keys(), "Device parameters not reported") @@ -423,16 +479,17 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False): fail("ns_dev" not in dev.keys(), "Device parameters not reported") fail("ns_inode" not in dev.keys(), "Device parameters not reported") - if not removed and not other_ns: + if not other_ns: fail("ifname" not in dev.keys(), "Ifname not reported") fail(dev["ifname"] != sim["ifname"], "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) else: fail("ifname" in dev.keys(), "Ifname is reported for other ns") - if removed: - fail(dev["ifindex"] != 0, "Device perameters not zero on removed") - fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") - fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") + + maps = bpftool_map_list(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") # Parse command line parser = argparse.ArgumentParser() @@ -464,7 +521,7 @@ if out.find("/sys/kernel/debug type debugfs") == -1: cmd("mount -t debugfs none /sys/kernel/debug") # Check samples are compiled -samples = ["sample_ret0.o"] +samples = ["sample_ret0.o", "sample_map_ret0.o"] for s in samples: ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) skip(ret != 0, "sample %s/%s not found, please compile it" % @@ -739,8 +796,9 @@ try: bpftool_prog_list_wait(expected=0) sim = NetdevSim() - sim.set_ethtool_tc_offloads(True) - sim.set_xdp(obj, "offload") + map_obj = bpf_obj("sample_map_ret0.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON start_test("Test bpftool bound info reporting (own ns)...") check_dev_info(False, "") @@ -757,11 +815,111 @@ try: sim.set_ns("") check_dev_info(False, "") - pin_file, _ = pin_prog("/sys/fs/bpf/tmp") + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) sim.remove() start_test("Test bpftool bound info reporting (removed dev)...") - check_dev_info(True, "", pin_file=pin_file, removed=True) + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + sim.remove() + + sim = NetdevSim() + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + sim.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + sim = NetdevSim() + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") print("%s: OK" % (os.path.basename(__file__))) -- cgit v1.2.3 From a55aaf6db0587e1a6e79dce8ada0e237d6068afb Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Jan 2018 14:17:45 +0000 Subject: bpftool: recognize BPF_MAP_TYPE_CPUMAP maps Add BPF_MAP_TYPE_CPUMAP map type to the list of map type recognized by bpftool and define corresponding text representation. Signed-off-by: Roman Gushchin Cc: Quentin Monnet Cc: Jakub Kicinski Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a152c1a5c94c..f95fa67bb498 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -66,6 +66,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", [BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", }; static unsigned int get_possible_cpus(void) -- cgit v1.2.3 From b7bcc0bbb8acb640258bb451f1f9391737da48b1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 18 Jan 2018 17:36:24 -0700 Subject: selftests: bpf: update .gitignore with missing generated files Update .gitignore with missing generated files. Signed-off-by: Shuah Khan Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 541d9d7fad5a..1e09d77f1948 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -3,3 +3,10 @@ test_maps test_lru_map test_lpm_map test_tag +FEATURE-DUMP.libbpf +fixdep +test_align +test_dev_cgroup +test_progs +test_verifier_log +feature -- cgit v1.2.3 From 8c417dc15f9522672795981dcb63d9099ca6bd8c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 18 Jan 2018 15:08:51 -0800 Subject: tools/bpf: add a testcase for MAP_GET_NEXT_KEY command of LPM_TRIE map A test case is added in tools/testing/selftests/bpf/test_lpm_map.c for MAP_GET_NEXT_KEY command. A four node trie, which is described in kernel/bpf/lpm_trie.c, is built and the MAP_GET_NEXT_KEY results are checked. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_lpm_map.c | 122 +++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index f61480641b6e..081510853c6d 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -521,6 +521,126 @@ static void test_lpm_delete(void) close(map_fd); } +static void test_lpm_get_next_key(void) +{ + struct bpf_lpm_trie_key *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + next_key_p = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && + errno == ENOENT); + + /* get and verify the first key, get the second one should fail. */ + key_p->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should get the first one in post order. */ + key_p->prefixlen = 8; + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + /* add one more element (total two) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total three) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total four) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should return the first one in post order */ + key_p->prefixlen = 22; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -545,6 +665,8 @@ int main(void) test_lpm_delete(); + test_lpm_get_next_key(); + printf("test_lpm: OK\n"); return 0; } -- cgit v1.2.3 From 87c1793b1b7f34915e9e64cdb503efb281c769a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 20 Jan 2018 01:24:32 +0100 Subject: bpf: add couple of test cases for div/mod by zero Add couple of missing test cases for eBPF div/mod by zero to the new test_verifier prog runtime feature. Also one for an empty prog and only exit. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 87 +++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6c22edb1f006..efca10de64e9 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -101,6 +101,93 @@ static struct bpf_test tests[] = { .result = ACCEPT, .retval = -3, }, + { + "DIV32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "empty prog", + .insns = { + }, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "only exit insn", + .insns = { + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + }, { "unreachable", .insns = { -- cgit v1.2.3 From be68fb64f763b7b6ddb202e0a931f41ae62f71b0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 28 Nov 2017 14:06:39 +0530 Subject: selftest/powerpc: Add additional option to mmap_bench test This patch adds --pgfault and --iterations options to mmap_bench test. With --pgfault we touch every page mapped. This helps in measuring impact in the page fault path with a patch series. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- .../selftests/powerpc/benchmarks/mmap_bench.c | 53 ++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c index 8d084a2d6e74..7a0a462a2272 100644 --- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c +++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c @@ -7,17 +7,34 @@ #include #include #include +#include #include "utils.h" #define ITERATIONS 5000000 -#define MEMSIZE (128 * 1024 * 1024) +#define MEMSIZE (1UL << 27) +#define PAGE_SIZE (1UL << 16) +#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE) + +static int pg_fault; +static int iterations = ITERATIONS; + +static struct option options[] = { + { "pgfault", no_argument, &pg_fault, 1 }, + { "iterations", required_argument, 0, 'i' }, + { 0, }, +}; + +static void usage(void) +{ + printf("mmap_bench <--pgfault> <--iterations count>\n"); +} int test_mmap(void) { struct timespec ts_start, ts_end; - unsigned long i = ITERATIONS; + unsigned long i = iterations; clock_gettime(CLOCK_MONOTONIC, &ts_start); @@ -25,6 +42,11 @@ int test_mmap(void) char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); FAIL_IF(c == MAP_FAILED); + if (pg_fault) { + int count; + for (count = 0; count < CHUNK_COUNT; count++) + c[count << 16] = 'c'; + } munmap(c, MEMSIZE); } @@ -35,7 +57,32 @@ int test_mmap(void) return 0; } -int main(void) +int main(int argc, char *argv[]) { + signed char c; + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "", options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 0: + if (options[option_index].flag != 0) + break; + + usage(); + exit(1); + break; + case 'i': + iterations = atoi(optarg); + break; + default: + usage(); + exit(1); + } + } return test_harness(test_mmap, "mmap_bench"); } -- cgit v1.2.3 From 8d1915873d492b8e1f03bbcab527db62a8d49542 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 16 Oct 2017 16:04:02 +1100 Subject: selftests/powerpc: Add alignment handler selftest Add a selftest to exercise the powerpc alignment fault handler. Signed-off-by: Michael Neuling Signed-off-by: Andrew Donnellan [mpe: Add 32-bit support to the signal handler] Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/alignment/Makefile | 3 +- .../powerpc/alignment/alignment_handler.c | 491 +++++++++++++++++++++ 2 files changed, 493 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/alignment/alignment_handler.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 16b22004e75f..083a48a008b4 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -1,4 +1,5 @@ -TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned +TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \ + paste_last_unaligned alignment_handler include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c new file mode 100644 index 000000000000..39fd362415cf --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -0,0 +1,491 @@ +/* + * Test the powerpc alignment handler on POWER8/POWER9 + * + * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This selftest exercises the powerpc alignment fault handler. + * + * We create two sets of source and destination buffers, one in regular memory, + * the other cache-inhibited (we use /dev/fb0 for this). + * + * We initialise the source buffers, then use whichever set of load/store + * instructions is under test to copy bytes from the source buffers to the + * destination buffers. For the regular buffers, these instructions will + * execute normally. For the cache-inhibited buffers, these instructions + * will trap and cause an alignment fault, and the alignment fault handler + * will emulate the particular instruction under test. We then compare the + * destination buffers to ensure that the native and emulated cases give the + * same result. + * + * TODO: + * - Any FIXMEs below + * - Test VSX regs < 32 and > 32 + * - Test all loads and stores + * - Check update forms do update register + * - Test alignment faults over page boundary + * + * Some old binutils may not support all the instructions. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +int bufsize; +int debug; +int testing; +volatile int gotsig; + +void sighandler(int sig, siginfo_t *info, void *ctx) +{ + struct ucontext *ucp = ctx; + + if (!testing) { + signal(sig, SIG_DFL); + kill(0, sig); + } + gotsig = sig; +#ifdef __powerpc64__ + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; +#else + ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; +#endif +} + +#define XFORM(reg, n) " " #reg " ,%"#n",%2 ;" +#define DFORM(reg, n) " " #reg " ,0(%"#n") ;" + +#define TEST(name, ld_op, st_op, form, ld_reg, st_reg) \ + void test_##name(char *s, char *d) \ + { \ + asm volatile( \ + #ld_op form(ld_reg, 0) \ + #st_op form(st_reg, 1) \ + :: "r"(s), "r"(d), "r"(0) \ + : "memory", "vs0", "vs32", "r31"); \ + } \ + rc |= do_test(#name, test_##name) + +#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32) +#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32) +#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32) +#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32) +#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32) +#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0) +#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32) +#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0) + +#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31) +#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31) +#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31) +#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31) + +#define LOAD_FLOAT_DFORM_TEST(op) TEST(op, op, stfd, DFORM, 0, 0) +#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0) +#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0) +#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0) + + +/* FIXME: Unimplemented tests: */ +// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */ +// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */ + +// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */ +// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ + + +/* preload byte by byte */ +void preload_data(void *dst, int offset, int width) +{ + char *c = dst; + int i; + + c += offset; + + for (i = 0 ; i < width ; i++) + c[i] = i; +} + +int test_memcpy(void *dst, void *src, int size, int offset, + void (*test_func)(char *, char *)) +{ + char *s, *d; + + s = src; + s += offset; + d = dst; + d += offset; + + assert(size == 16); + gotsig = 0; + testing = 1; + + test_func(s, d); /* run the actual test */ + + testing = 0; + if (gotsig) { + if (debug) + printf(" Got signal %i\n", gotsig); + return 1; + } + return 0; +} + +void dumpdata(char *s1, char *s2, int n, char *test_name) +{ + int i; + + printf(" %s: unexpected result:\n", test_name); + printf(" mem:"); + for (i = 0; i < n; i++) + printf(" %02x", s1[i]); + printf("\n"); + printf(" ci: "); + for (i = 0; i < n; i++) + printf(" %02x", s2[i]); + printf("\n"); +} + +int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name) +{ + char *s1c, *s2c; + + s1c = s1; + s1c += offset; + s2c = s2; + s2c += offset; + + if (memcmp(s1c, s2c, n)) { + if (debug) { + printf("\n Compare failed. Offset:%i length:%i\n", + offset, n); + dumpdata(s1c, s2c, n, test_name); + } + return 1; + } + return 0; +} + +/* + * Do two memcpy tests using the same instructions. One cachable + * memory and the other doesn't. + */ +int do_test(char *test_name, void (*test_func)(char *, char *)) +{ + int offset, width, fd, rc = 0, r; + void *mem0, *mem1, *ci0, *ci1; + + printf("\tDoing %s:\t", test_name); + + fd = open("/dev/fb0", O_RDWR); + if (fd < 0) { + printf("\n"); + perror("Can't open /dev/fb0"); + SKIP_IF(1); + } + + ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, + fd, 0x0); + ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, + fd, bufsize); + if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) { + printf("\n"); + perror("mmap failed"); + SKIP_IF(1); + } + + rc = posix_memalign(&mem0, bufsize, bufsize); + if (rc) { + printf("\n"); + return rc; + } + + rc = posix_memalign(&mem1, bufsize, bufsize); + if (rc) { + printf("\n"); + free(mem0); + return rc; + } + + /* offset = 0 no alignment fault, so skip */ + for (offset = 1; offset < 16; offset++) { + width = 16; /* vsx == 16 bytes */ + r = 0; + + /* load pattern into memory byte by byte */ + preload_data(ci0, offset, width); + preload_data(mem0, offset, width); // FIXME: remove?? + memcpy(ci0, mem0, bufsize); + memcpy(ci1, mem1, bufsize); /* initialise output to the same */ + + /* sanity check */ + test_memcmp(mem0, ci0, width, offset, test_name); + + r |= test_memcpy(ci1, ci0, width, offset, test_func); + r |= test_memcpy(mem1, mem0, width, offset, test_func); + if (r && !debug) { + printf("FAILED: Got signal"); + break; + } + + r |= test_memcmp(mem1, ci1, width, offset, test_name); + rc |= r; + if (r && !debug) { + printf("FAILED: Wrong Data"); + break; + } + } + if (!r) + printf("PASSED"); + printf("\n"); + + munmap(ci0, bufsize); + munmap(ci1, bufsize); + free(mem0); + free(mem1); + + return rc; +} + +int test_alignment_handler_vsx_206(void) +{ + int rc = 0; + + printf("VSX: 2.06B\n"); + LOAD_VSX_XFORM_TEST(lxvd2x); + LOAD_VSX_XFORM_TEST(lxvw4x); + LOAD_VSX_XFORM_TEST(lxsdx); + LOAD_VSX_XFORM_TEST(lxvdsx); + STORE_VSX_XFORM_TEST(stxvd2x); + STORE_VSX_XFORM_TEST(stxvw4x); + STORE_VSX_XFORM_TEST(stxsdx); + return rc; +} + +int test_alignment_handler_vsx_207(void) +{ + int rc = 0; + + printf("VSX: 2.07B\n"); + LOAD_VSX_XFORM_TEST(lxsspx); + LOAD_VSX_XFORM_TEST(lxsiwax); + LOAD_VSX_XFORM_TEST(lxsiwzx); + STORE_VSX_XFORM_TEST(stxsspx); + STORE_VSX_XFORM_TEST(stxsiwx); + return rc; +} + +int test_alignment_handler_vsx_300(void) +{ + int rc = 0; + + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + printf("VSX: 3.00B\n"); + LOAD_VMX_DFORM_TEST(lxsd); + LOAD_VSX_XFORM_TEST(lxsibzx); + LOAD_VSX_XFORM_TEST(lxsihzx); + LOAD_VMX_DFORM_TEST(lxssp); + LOAD_VSX_DFORM_TEST(lxv); + LOAD_VSX_XFORM_TEST(lxvb16x); + LOAD_VSX_XFORM_TEST(lxvh8x); + LOAD_VSX_XFORM_TEST(lxvx); + LOAD_VSX_XFORM_TEST(lxvwsx); + LOAD_VSX_XFORM_TEST(lxvl); + LOAD_VSX_XFORM_TEST(lxvll); + STORE_VMX_DFORM_TEST(stxsd); + STORE_VSX_XFORM_TEST(stxsibx); + STORE_VSX_XFORM_TEST(stxsihx); + STORE_VMX_DFORM_TEST(stxssp); + STORE_VSX_DFORM_TEST(stxv); + STORE_VSX_XFORM_TEST(stxvb16x); + STORE_VSX_XFORM_TEST(stxvh8x); + STORE_VSX_XFORM_TEST(stxvx); + STORE_VSX_XFORM_TEST(stxvl); + STORE_VSX_XFORM_TEST(stxvll); + return rc; +} + +int test_alignment_handler_integer(void) +{ + int rc = 0; + + printf("Integer\n"); + LOAD_DFORM_TEST(lbz); + LOAD_DFORM_TEST(lbzu); + LOAD_XFORM_TEST(lbzx); + LOAD_XFORM_TEST(lbzux); + LOAD_DFORM_TEST(lhz); + LOAD_DFORM_TEST(lhzu); + LOAD_XFORM_TEST(lhzx); + LOAD_XFORM_TEST(lhzux); + LOAD_DFORM_TEST(lha); + LOAD_DFORM_TEST(lhau); + LOAD_XFORM_TEST(lhax); + LOAD_XFORM_TEST(lhaux); + LOAD_XFORM_TEST(lhbrx); + LOAD_DFORM_TEST(lwz); + LOAD_DFORM_TEST(lwzu); + LOAD_XFORM_TEST(lwzx); + LOAD_XFORM_TEST(lwzux); + LOAD_DFORM_TEST(lwa); + LOAD_XFORM_TEST(lwax); + LOAD_XFORM_TEST(lwaux); + LOAD_XFORM_TEST(lwbrx); + LOAD_DFORM_TEST(ld); + LOAD_DFORM_TEST(ldu); + LOAD_XFORM_TEST(ldx); + LOAD_XFORM_TEST(ldux); + LOAD_XFORM_TEST(ldbrx); + LOAD_DFORM_TEST(lmw); + STORE_DFORM_TEST(stb); + STORE_XFORM_TEST(stbx); + STORE_DFORM_TEST(stbu); + STORE_XFORM_TEST(stbux); + STORE_DFORM_TEST(sth); + STORE_XFORM_TEST(sthx); + STORE_DFORM_TEST(sthu); + STORE_XFORM_TEST(sthux); + STORE_XFORM_TEST(sthbrx); + STORE_DFORM_TEST(stw); + STORE_XFORM_TEST(stwx); + STORE_DFORM_TEST(stwu); + STORE_XFORM_TEST(stwux); + STORE_XFORM_TEST(stwbrx); + STORE_DFORM_TEST(std); + STORE_XFORM_TEST(stdx); + STORE_DFORM_TEST(stdu); + STORE_XFORM_TEST(stdux); + STORE_XFORM_TEST(stdbrx); + STORE_DFORM_TEST(stmw); + return rc; +} + +int test_alignment_handler_vmx(void) +{ + int rc = 0; + + printf("VMX\n"); + LOAD_VMX_XFORM_TEST(lvx); + + /* + * FIXME: These loads only load part of the register, so our + * testing method doesn't work. Also they don't take alignment + * faults, so it's kinda pointless anyway + * + LOAD_VMX_XFORM_TEST(lvebx) + LOAD_VMX_XFORM_TEST(lvehx) + LOAD_VMX_XFORM_TEST(lvewx) + LOAD_VMX_XFORM_TEST(lvxl) + */ + STORE_VMX_XFORM_TEST(stvx); + STORE_VMX_XFORM_TEST(stvebx); + STORE_VMX_XFORM_TEST(stvehx); + STORE_VMX_XFORM_TEST(stvewx); + STORE_VMX_XFORM_TEST(stvxl); + return rc; +} + +int test_alignment_handler_fp(void) +{ + int rc = 0; + + printf("Floating point\n"); + LOAD_FLOAT_DFORM_TEST(lfd); + LOAD_FLOAT_XFORM_TEST(lfdx); + LOAD_FLOAT_DFORM_TEST(lfdp); + LOAD_FLOAT_XFORM_TEST(lfdpx); + LOAD_FLOAT_DFORM_TEST(lfdu); + LOAD_FLOAT_XFORM_TEST(lfdux); + LOAD_FLOAT_DFORM_TEST(lfs); + LOAD_FLOAT_XFORM_TEST(lfsx); + LOAD_FLOAT_DFORM_TEST(lfsu); + LOAD_FLOAT_XFORM_TEST(lfsux); + LOAD_FLOAT_XFORM_TEST(lfiwzx); + LOAD_FLOAT_XFORM_TEST(lfiwax); + STORE_FLOAT_DFORM_TEST(stfd); + STORE_FLOAT_XFORM_TEST(stfdx); + STORE_FLOAT_DFORM_TEST(stfdp); + STORE_FLOAT_XFORM_TEST(stfdpx); + STORE_FLOAT_DFORM_TEST(stfdu); + STORE_FLOAT_XFORM_TEST(stfdux); + STORE_FLOAT_DFORM_TEST(stfs); + STORE_FLOAT_XFORM_TEST(stfsx); + STORE_FLOAT_DFORM_TEST(stfsu); + STORE_FLOAT_XFORM_TEST(stfsux); + STORE_FLOAT_XFORM_TEST(stfiwx); + + return rc; +} + +void usage(char *prog) +{ + printf("Usage: %s [options]\n", prog); + printf(" -d Enable debug error output\n"); + printf("\n"); + printf("This test requires a POWER8 or POWER9 CPU and a usable "); + printf("framebuffer at /dev/fb0.\n"); +} + +int main(int argc, char *argv[]) +{ + + struct sigaction sa; + int rc = 0; + int option = 0; + + while ((option = getopt(argc, argv, "d")) != -1) { + switch (option) { + case 'd': + debug++; + break; + default: + usage(argv[0]); + exit(1); + } + } + + bufsize = getpagesize(); + + sa.sa_sigaction = sighandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL) == -1 + || sigaction(SIGBUS, &sa, NULL) == -1 + || sigaction(SIGILL, &sa, NULL) == -1) { + perror("sigaction"); + exit(1); + } + + rc |= test_harness(test_alignment_handler_vsx_206, + "test_alignment_handler_vsx_206"); + rc |= test_harness(test_alignment_handler_vsx_207, + "test_alignment_handler_vsx_207"); + rc |= test_harness(test_alignment_handler_vsx_300, + "test_alignment_handler_vsx_300"); + rc |= test_harness(test_alignment_handler_integer, + "test_alignment_handler_integer"); + rc |= test_harness(test_alignment_handler_vmx, + "test_alignment_handler_vmx"); + rc |= test_harness(test_alignment_handler_fp, + "test_alignment_handler_fp"); + return rc; +} -- cgit v1.2.3 From a08082f8e4e1c292df174a9ed303cfbff2fbe2cb Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Sun, 31 Dec 2017 18:20:46 -0500 Subject: powerpc/selftests: Check endianness on trap in TM Add a selftest to check if endianness is flipped inadvertently to BE (MSR.LE set to zero) on BE and LE machines when a trap is caught in transactional mode and load_fp and load_vec are zero, i.e. when MSR.FP and MSR.VEC are zeroed (disabled). Signed-off-by: Gustavo Romero Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 3 +- tools/testing/selftests/powerpc/tm/tm-trap.c | 329 ++++++++++++++++++++++++++ 3 files changed, 332 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-trap.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 241a4a4ee0e4..bb90d4b79524 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -13,3 +13,4 @@ tm-signal-context-chk-vmx tm-signal-context-chk-vsx tm-vmx-unavail tm-unavailable +tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 8ed6f8c57230..a23453943ad2 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -3,7 +3,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu tm-signal-context-chk-vmx tm-signal-context-chk-vsx TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \ + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) include ../../lib.mk @@ -18,6 +18,7 @@ $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 $(OUTPUT)/tm-resched-dscr: ../pmu/lib.o $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx +$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c new file mode 100644 index 000000000000..5d92c23ee6cb --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -0,0 +1,329 @@ +/* + * Copyright 2017, Gustavo Romero, IBM Corp. + * Licensed under GPLv2. + * + * Check if thread endianness is flipped inadvertently to BE on trap + * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after + * load_fp and load_vec overflowed). + * + * The issue can be checked on LE machines simply by zeroing load_fp + * and load_vec and then causing a trap in TM. Since the endianness + * changes to BE on return from the signal handler, 'nop' is + * thread as an illegal instruction in following sequence: + * tbegin. + * beq 1f + * trap + * tend. + * 1: nop + * + * However, although the issue is also present on BE machines, it's a + * bit trickier to check it on BE machines because MSR.LE bit is set + * to zero which determines a BE endianness that is the native + * endianness on BE machines, so nothing notably critical happens, + * i.e. no illegal instruction is observed immediately after returning + * from the signal handler (as it happens on LE machines). Thus to test + * it on BE machines LE endianness is forced after a first trap and then + * the endianness is verified on subsequent traps to determine if the + * endianness "flipped back" to the native endianness (BE). + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tm.h" +#include "utils.h" + +#define pr_error(error_code, format, ...) \ + error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__) + +#define MSR_LE 1UL +#define LE 1UL + +pthread_t t0_ping; +pthread_t t1_pong; + +int exit_from_pong; + +int trap_event; +int le; + +bool success; + +void trap_signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + uint64_t thread_endianness; + + /* Get thread endianness: extract bit LE from MSR */ + thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; + + /*** + * Little-Endian Machine + */ + + if (le) { + /* First trap event */ + if (trap_event == 0) { + /* Do nothing. Since it is returning from this trap + * event that endianness is flipped by the bug, so just + * let the process return from the signal handler and + * check on the second trap event if endianness is + * flipped or not. + */ + } + /* Second trap event */ + else if (trap_event == 1) { + /* + * Since trap was caught in TM on first trap event, if + * endianness was still LE (not flipped inadvertently) + * after returning from the signal handler instruction + * (1) is executed (basically a 'nop'), as it's located + * at address of tbegin. +4 (rollback addr). As (1) on + * LE endianness does in effect nothing, instruction (2) + * is then executed again as 'trap', generating a second + * trap event (note that in that case 'trap' is caught + * not in transacional mode). On te other hand, if after + * the return from the signal handler the endianness in- + * advertently flipped, instruction (1) is tread as a + * branch instruction, i.e. b .+8, hence instruction (3) + * and (4) are executed (tbegin.; trap;) and we get sim- + * ilaly on the trap signal handler, but now in TM mode. + * Either way, it's now possible to check the MSR LE bit + * once in the trap handler to verify if endianness was + * flipped or not after the return from the second trap + * event. If endianness is flipped, the bug is present. + * Finally, getting a trap in TM mode or not is just + * worth noting because it affects the math to determine + * the offset added to the NIP on return: the NIP for a + * trap caught in TM is the rollback address, i.e. the + * next instruction after 'tbegin.', whilst the NIP for + * a trap caught in non-transactional mode is the very + * same address of the 'trap' instruction that generated + * the trap event. + */ + + if (thread_endianness == LE) { + /* Go to 'success', i.e. instruction (6) */ + ucp->uc_mcontext.gp_regs[PT_NIP] += 16; + } else { + /* + * Thread endianness is BE, so it flipped + * inadvertently. Thus we flip back to LE and + * set NIP to go to 'failure', instruction (5). + */ + ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + } + } + } + + /*** + * Big-Endian Machine + */ + + else { + /* First trap event */ + if (trap_event == 0) { + /* + * Force thread endianness to be LE. Instructions (1), + * (3), and (4) will be executed, generating a second + * trap in TM mode. + */ + ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; + } + /* Second trap event */ + else if (trap_event == 1) { + /* + * Do nothing. If bug is present on return from this + * second trap event endianness will flip back "automat- + * ically" to BE, otherwise thread endianness will + * continue to be LE, just as it was set above. + */ + } + /* A third trap event */ + else { + /* + * Once here it means that after returning from the sec- + * ond trap event instruction (4) (trap) was executed + * as LE, generating a third trap event. In that case + * endianness is still LE as set on return from the + * first trap event, hence no bug. Otherwise, bug + * flipped back to BE on return from the second trap + * event and instruction (4) was executed as 'tdi' (so + * basically a 'nop') and branch to 'failure' in + * instruction (5) was taken to indicate failure and we + * never get here. + */ + + /* + * Flip back to BE and go to instruction (6), i.e. go to + * 'success'. + */ + ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL; + ucp->uc_mcontext.gp_regs[PT_NIP] += 8; + } + } + + trap_event++; +} + +void usr1_signal_handler(int signo, siginfo_t *si, void *not_used) +{ + /* Got a USR1 signal from ping(), so just tell pong() to exit */ + exit_from_pong = 1; +} + +void *ping(void *not_used) +{ + uint64_t i; + + trap_event = 0; + + /* + * Wait an amount of context switches so load_fp and load_vec overflows + * and MSR_[FP|VEC|V] is 0. + */ + for (i = 0; i < 1024*1024*512; i++) + ; + + asm goto( + /* + * [NA] means "Native Endianness", i.e. it tells how a + * instruction is executed on machine's native endianness (in + * other words, native endianness matches kernel endianness). + * [OP] means "Opposite Endianness", i.e. on a BE machine, it + * tells how a instruction is executed as a LE instruction; con- + * versely, on a LE machine, it tells how a instruction is + * executed as a BE instruction. When [NA] is omitted, it means + * that the native interpretation of a given instruction is not + * relevant for the test. Likewise when [OP] is omitted. + */ + + " tbegin. ;" /* (0) tbegin. [NA] */ + " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */ + " trap ;" /* (2) trap [NA] */ + ".long 0x1D05007C;" /* (3) tbegin. [OP] */ + ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */ + " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */ + " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/ + + : : : : failure, success); + +failure: + success = false; + goto exit_from_ping; + +success: + success = true; + +exit_from_ping: + /* Tell pong() to exit before leaving */ + pthread_kill(t1_pong, SIGUSR1); + return NULL; +} + +void *pong(void *not_used) +{ + while (!exit_from_pong) + /* + * Induce context switches on ping() thread + * until ping() finishes its job and signs + * to exit from this loop. + */ + sched_yield(); + + return NULL; +} + +int tm_trap_test(void) +{ + uint16_t k = 1; + + int rc; + + pthread_attr_t attr; + cpu_set_t cpuset; + + struct sigaction trap_sa; + + trap_sa.sa_flags = SA_SIGINFO; + trap_sa.sa_sigaction = trap_signal_handler; + sigaction(SIGTRAP, &trap_sa, NULL); + + struct sigaction usr1_sa; + + usr1_sa.sa_flags = SA_SIGINFO; + usr1_sa.sa_sigaction = usr1_signal_handler; + sigaction(SIGUSR1, &usr1_sa, NULL); + + /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + /* Init pthread attribute */ + rc = pthread_attr_init(&attr); + if (rc) + pr_error(rc, "pthread_attr_init()"); + + /* + * Bind thread ping() and pong() both to CPU 0 so they ping-pong and + * speed up context switches on ping() thread, speeding up the load_fp + * and load_vec overflow. + */ + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) + pr_error(rc, "pthread_attr_setaffinity()"); + + /* Figure out the machine endianness */ + le = (int) *(uint8_t *)&k; + + printf("%s machine detected. Checking if endianness flips %s", + le ? "Little-Endian" : "Big-Endian", + "inadvertently on trap in TM... "); + + rc = fflush(0); + if (rc) + pr_error(rc, "fflush()"); + + /* Launch ping() */ + rc = pthread_create(&t0_ping, &attr, ping, NULL); + if (rc) + pr_error(rc, "pthread_create()"); + + exit_from_pong = 0; + + /* Launch pong() */ + rc = pthread_create(&t1_pong, &attr, pong, NULL); + if (rc) + pr_error(rc, "pthread_create()"); + + rc = pthread_join(t0_ping, NULL); + if (rc) + pr_error(rc, "pthread_join()"); + + rc = pthread_join(t1_pong, NULL); + if (rc) + pr_error(rc, "pthread_join()"); + + if (success) { + printf("no.\n"); /* no, endianness did not flip inadvertently */ + return EXIT_SUCCESS; + } + + printf("yes!\n"); /* yes, endianness did flip inadvertently */ + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + return test_harness(tm_trap_test, "tm_trap_test"); +} -- cgit v1.2.3 From ef54cf0c600fb8f5737fb001a9e357edda1a1de8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:07:30 -0700 Subject: usbip: prevent bind loops on devices attached to vhci_hcd usbip host binds to devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Unbind followed by bind works, however device is left in a bad state with accesses via the attached busid result in errors and system hangs during shutdown. Fix it to check and bail out if the device is already attached to vhci_hcd. Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_bind.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c index fa46141ae68b..e121cfb1746a 100644 --- a/tools/usb/usbip/src/usbip_bind.c +++ b/tools/usb/usbip/src/usbip_bind.c @@ -144,6 +144,7 @@ static int bind_device(char *busid) int rc; struct udev *udev; struct udev_device *dev; + const char *devpath; /* Check whether the device with this bus ID exists. */ udev = udev_new(); @@ -152,8 +153,16 @@ static int bind_device(char *busid) err("device with the specified bus ID does not exist"); return -1; } + devpath = udev_device_get_devpath(dev); udev_unref(udev); + /* If the device is already attached to vhci_hcd - bail out */ + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + err("bind loop detected: device: %s is attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + return -1; + } + rc = unbind_other(busid); if (rc == UNBIND_ST_FAILED) { err("could not unbind driver from device on busid %s", busid); -- cgit v1.2.3 From ef824501f50846589f02173d73ce3fe6021a9d2a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:08:03 -0700 Subject: usbip: list: don't list devices attached to vhci_hcd usbip host lists devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Fix it to check and not list devices that are attached to vhci_hcd. Cc: stable@vger.kernel.org Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_list.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index f1b38e866dd7..d65a9f444174 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -187,6 +187,7 @@ static int list_devices(bool parsable) const char *busid; char product_name[128]; int ret = -1; + const char *devpath; /* Create libudev context. */ udev = udev_new(); @@ -209,6 +210,14 @@ static int list_devices(bool parsable) path = udev_list_entry_get_name(dev_list_entry); dev = udev_device_new_from_syspath(udev, path); + /* Ignore devices attached to vhci_hcd */ + devpath = udev_device_get_devpath(dev); + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + dbg("Skip the device %s already attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + continue; + } + /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); -- cgit v1.2.3 From cc08d1b91d1e0679d015bf7a99aedb28e2c89e12 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 18 Jan 2018 22:26:25 +0000 Subject: selftests/x86: Add {,_32,_64} targets One can only use `make all` or `make _` as make targets. `make ` doesn't work as Ingo noticed: x86> make test_vsyscall gcc -O2 -g -std=gnu99 -pthread -Wall -no-pie test_vsyscall.c -o test_vsyscall /tmp/aBaoo3nb.o: In function `init_vdso': test_vsyscall.c:68: undefined reference to `dlopen' test_vsyscall.c:76: undefined reference to `dlsym' test_vsyscall.c:80: undefined reference to `dlsym' test_vsyscall.c:84: undefined reference to `dlsym' test_vsyscall.c:88: undefined reference to `dlsym' test_vsyscall.c:70: undefined reference to `dlopen' collect2: error: ld returned 1 exit status : recipe for target 'test_vsyscall' failed make: *** [test_vsyscall] Error 1 Makefile target substitution neither works :-/ Generate .PHONY targets per-test and fix target substitution. Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: Shuah Khan Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: x86@kernel.org Reported-by: Ingo Molnar Signed-off-by: Dmitry Safonov Reviewed-by: Andy Lutomirski Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/Makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 939a337128db..e7324fbc76b1 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -27,14 +27,26 @@ UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) +define gen-target-rule-32 +$(1) $(1)_32: $(OUTPUT)/$(1)_32 +.PHONY: $(1) $(1)_32 +endef + +define gen-target-rule-64 +$(1) $(1)_64: $(OUTPUT)/$(1)_64 +.PHONY: $(1) $(1)_64 +endef + ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) +$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t)))) endif ifeq ($(CAN_BUILD_X86_64),1) all: all_64 TEST_PROGS += $(BINARIES_64) +$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t)))) endif all_32: $(BINARIES_32) -- cgit v1.2.3 From 0e9e327d1622c332e5b1db272d5c97af82164f68 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 18 Jan 2018 17:11:08 -0700 Subject: selftests: vm: update .gitignore with missing generated file Add missing generated file to .gitignore. Signed-off-by: Shuah Khan --- tools/testing/selftests/vm/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 1ca2ee4d15b9..63c94d776e89 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -10,3 +10,4 @@ userfaultfd mlock-intersect-test mlock-random-test virtual_address_range +gup_benchmark -- cgit v1.2.3 From 35136920e100b85b15b2cfd1505453ba5b6c757f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 22 Jan 2018 22:10:59 -0800 Subject: tools/bpf: fix a test failure in selftests prog test_verifier Commit 111e6b45315c ("selftests/bpf: make test_verifier run most programs") enables tools/testing/selftests/bpf/test_verifier unit cases to run via bpf_prog_test_run command. With the latest code base, test_verifier had one test case failure: ... #473/p check deducing bounds from const, 2 FAIL retval 1 != 0 0: (b7) r0 = 1 1: (75) if r0 s>= 0x1 goto pc+1 R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 2: (95) exit from 1 to 3: R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 3: (d5) if r0 s<= 0x1 goto pc+1 R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 4: (95) exit from 3 to 5: R0=inv1 R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 5: (1f) r1 -= r0 6: (95) exit processed 7 insns (limit 131072), stack depth 0 ... The test case does not set return value in the test structure and hence the return value from the prog run is assumed to be 0. However, the actual return value is 1. As a result, the test failed. The fix is to correctly set the return value in the test structure. Fixes: 111e6b45315c ("selftests/bpf: make test_verifier run most programs") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index fb82d29ee863..9e7075b268be 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8766,6 +8766,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check deducing bounds from const, 3", -- cgit v1.2.3 From 1a97cf1fe50340c5e758d7a74419d8f6e8b49ace Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 22 Jan 2018 17:46:57 -0800 Subject: selftests/bpf: speedup test_maps test_hashmap_walk takes very long time on debug kernel with kasan on. Reduce the number of iterations in this test without sacrificing test coverage. Also add printfs as progress indicator. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 040356ecc862..f0d2f09898a3 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -242,7 +242,7 @@ static void test_hashmap_percpu(int task, void *data) static void test_hashmap_walk(int task, void *data) { - int fd, i, max_entries = 100000; + int fd, i, max_entries = 1000; long long key, value, next_key; bool next_key_valid = true; @@ -931,8 +931,12 @@ static void test_map_large(void) close(fd); } -static void run_parallel(int tasks, void (*fn)(int task, void *data), - void *data) +#define run_parallel(N, FN, DATA) \ + printf("Fork %d tasks to '" #FN "'\n", N); \ + __run_parallel(N, FN, DATA) + +static void __run_parallel(int tasks, void (*fn)(int task, void *data), + void *data) { pid_t pid[tasks]; int i; @@ -972,7 +976,7 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -static void do_work(int fn, void *data) +static void test_update_delete(int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; @@ -1012,7 +1016,7 @@ static void test_map_parallel(void) */ data[0] = fd; data[1] = DO_UPDATE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Check that key=0 is already there. */ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && @@ -1035,7 +1039,7 @@ static void test_map_parallel(void) /* Now let's delete all elemenets in parallel. */ data[1] = DO_DELETE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Nothing should be left. */ key = -1; -- cgit v1.2.3 From 8e6875250a1189e0d8db8b05e18abe63c2744521 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 22 Jan 2018 20:48:40 -0800 Subject: selftests/bpf: fix test_dev_cgroup The test incorrectly doing mkdir /mnt/cgroup-test-work-dirtest-bpf-based-device-cgroup instead of mkdir /mnt/cgroup-test-work-dir/test-bpf-based-device-cgroup somehow such mkdir succeeds and new directory appears: /mnt/cgroup-test-work-dir/cgroup-test-work-dirtest-bpf-based-device-cgroup Later cleanup via nftw("/mnt/cgroup-test-work-dir", ...); doesn't walk this directory. "rmdir /mnt/cgroup-test-work-dir" succeeds, but bpf program and dangling cgroup stays in memory. That's a separate issue on a cgroup side. For now fix the test. Fixes: 37f1ba0909df ("selftests/bpf: add a test for device cgroup controller") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_dev_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index c1535b34f14f..3489cc283433 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -21,7 +21,7 @@ #define DEV_CGROUP_PROG "./dev_cgroup.o" -#define TEST_CGROUP "test-bpf-based-device-cgroup/" +#define TEST_CGROUP "/test-bpf-based-device-cgroup/" int main(int argc, char **argv) { -- cgit v1.2.3 From 783687810e986a15ffbf86c516a1a48ff37f38f7 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Tue, 23 Jan 2018 13:30:44 +0900 Subject: bpf: test_maps: cleanup sockmaps when test ends Bug: BPF programs and maps related to sockmaps test exist in memory even after test_maps ends. This patch fixes it as a short term workaround (sockmap kernel side needs real fixing) by empyting sockmaps when test ends. Fixes: 6f6d33f3b3d0f ("bpf: selftests add sockmap tests") Signed-off-by: Prashant Bhole [ daniel: Note on workaround. ] Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index f0d2f09898a3..436c4c72414f 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -463,7 +463,7 @@ static void test_devmap(int task, void *data) #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" static void test_sockmap(int tasks, void *data) { - int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; + int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; int err, i, fd, udp, sfd[6] = {0xdeadbeef}; @@ -868,9 +868,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } - /* Test map close sockets */ - for (i = 0; i < 6; i++) + /* Test map close sockets and empty maps */ + for (i = 0; i < 6; i++) { + bpf_map_delete_elem(map_fd_tx, &i); + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); close(map_fd_rx); bpf_object__close(obj); @@ -881,8 +884,13 @@ out: printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno)); exit(1); out_sockmap: - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) { + if (map_fd_tx) + bpf_map_delete_elem(map_fd_tx, &i); + if (map_fd_rx) + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); exit(1); } -- cgit v1.2.3 From caf952288d715df32b2e70c64506849287844fe4 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jan 2018 11:22:53 -0800 Subject: selftests/bpf: add checks on extack messages for eBPF hw offload tests Add checks to test that netlink extack messages are correctly displayed in some expected error cases for eBPF offload to netdevsim with TC and XDP. iproute2 may be built without libmnl support, in which case the extack messages will not be reported. Try to detect this condition, and when enountered print a mild warning to the user and skip the extack validation. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 112 +++++++++++++++++++++------- 1 file changed, 86 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 833b9c1ec450..8dca4dc6c193 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -26,6 +26,7 @@ import time logfile = None log_level = 1 +skip_extack = False bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) pp = pprint.PrettyPrinter() devs = [] # devices we created for clean up @@ -132,7 +133,7 @@ def rm(f): if f in files: files.remove(f) -def tool(name, args, flags, JSON=True, ns="", fail=True): +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): params = "" if JSON: params += "%s " % (flags["json"]) @@ -140,9 +141,20 @@ def tool(name, args, flags, JSON=True, ns="", fail=True): if ns != "": ns = "ip netns exec %s " % (ns) - ret, out = cmd(ns + name + " " + params + args, fail=fail) - if JSON and len(out.strip()) != 0: - return ret, json.loads(out) + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr else: return ret, out @@ -181,13 +193,15 @@ def bpftool_map_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) -def ip(args, force=False, JSON=True, ns="", fail=True): +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): if force: args = "-force " + args - return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail) + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) -def tc(args, JSON=True, ns="", fail=True): - return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) def ethtool(dev, opt, args, fail=True): return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) @@ -348,13 +362,17 @@ class NetdevSim: return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) - def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True): + def set_xdp(self, bpf, mode, force=False, JSON=True, + fail=True, include_stderr=False): return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), - force=force, JSON=JSON, fail=fail) + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) - def unset_xdp(self, mode, force=False, JSON=True, fail=True): + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), - force=force, JSON=JSON, fail=fail) + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) def ip_link_show(self, xdp): _, link = ip("link show dev %s" % (self['ifname'])) @@ -410,7 +428,7 @@ class NetdevSim: return filters def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False, - fail=True): + fail=True, include_stderr=False): params = "" if da: params += " da" @@ -419,7 +437,8 @@ class NetdevSim: if skip_hw: params += " skip_hw" return tc("filter add dev %s ingress bpf %s %s" % - (self['ifname'], bpf, params), fail=fail) + (self['ifname'], bpf, params), + fail=fail, include_stderr=include_stderr) def set_ethtool_tc_offloads(self, enable, fail=True): args = "hw-tc-offload %s" % ("on" if enable else "off") @@ -491,6 +510,16 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): fail("dev" not in m.keys(), "Device parameters not reported") fail(dev != m["dev"], "Map's device different than program's") +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "Error: netdevsim: " + reference, args) + # Parse command line parser = argparse.ArgumentParser() parser.add_argument("--log", help="output verbose log to given file") @@ -527,6 +556,14 @@ for s in samples: skip(ret != 0, "sample %s/%s not found, please compile it" % (bpf_test_dir, s)) +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + # Check if net namespaces seem to work ns = mknetns() skip(ns is None, "Could not create a net namespace") @@ -558,8 +595,10 @@ try: sim.tc_flush_filters() start_test("Test TC offloads are off by default...") - ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "Error: TC offload is disabled on net device.", args) sim.wait_for_flush() sim.set_ethtool_tc_offloads(True) @@ -587,8 +626,11 @@ try: sim.dfs["bpf_tc_non_bound_accept"] = "N" start_test("Test TC cBPF unbound bytecode doesn't offload...") - ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) sim.wait_for_flush() start_test("Test TC offloads work...") @@ -669,16 +711,24 @@ try: "Device parameters reported for non-offloaded program") start_test("Test XDP prog replace with bad flags...") - ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) + ret, _, err = sim.set_xdp(obj, "offload", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") - ret, _ = sim.set_xdp(obj, "", force=True, fail=False) + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.set_xdp(obj, "", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) start_test("Test XDP prog remove with bad flags...") - ret, _ = sim.unset_xdp("offload", force=True, fail=False) + ret, _, err = sim.unset_xdp("offload", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Removed program with a bad mode mode") - ret, _ = sim.unset_xdp("", force=True, fail=False) + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") ret, _ = sim.set_mtu(9000, fail=False) @@ -687,8 +737,9 @@ try: sim.unset_xdp("drv") bpftool_prog_list_wait(expected=0) sim.set_mtu(9000) - ret, _ = sim.set_xdp(obj, "drv", fail=False) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) sim.set_mtu(1500) sim.wait_for_flush() @@ -724,25 +775,32 @@ try: sim2.set_xdp(obj, "offload") pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") - ret, _ = sim.set_xdp(pinned, "offload", fail=False) + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack_nsim(err, "program bound to different dev.", args) sim2.remove() - ret, _ = sim.set_xdp(pinned, "offload", fail=False) + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) rm(pin_file) bpftool_prog_list_wait(expected=0) start_test("Test mixing of TC and XDP...") sim.tc_add_ingress() sim.set_xdp(obj, "offload") - ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) sim.unset_xdp("offload") sim.wait_for_flush() sim.cls_bpf_add_filter(obj, skip_sw=True) - ret, _ = sim.set_xdp(obj, "offload", fail=False) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) start_test("Test binding TC from pinned...") pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") @@ -765,8 +823,10 @@ try: start_test("Test asking for TC offload of two filters...") sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) - ret, _ = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) sim.tc_flush_filters(bound=2, total=2) -- cgit v1.2.3 From 9045bdc8ed4e4e2c713d8d38bda9aa506b4bd8c5 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jan 2018 11:22:55 -0800 Subject: selftests/bpf: check bpf verifier log buffer usage works for HW offload Make netdevsim print a message to the BPF verifier log buffer when a program is offloaded. Then use this message in hardware offload selftests to make sure that using this buffer actually prints the message to the console for eBPF hardware offload. The message is appended after the last instruction is processed with the verifying function from netdevsim. Output looks like the following: $ tc filter add dev foo ingress bpf obj sample_ret0.o \ sec .text verbose skip_sw Prog section '.text' loaded (5)! - Type: 3 - Instructions: 2 (0 over limit) - License: Verifier analysis: 0: (b7) r0 = 0 1: (95) exit [netdevsim] Hello from netdevsim! processed 2 insns, stack depth 0 "verbose" flag is required to see it in the console since netdevsim does not throw an error after printing the message. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 8dca4dc6c193..d128a16ee9a8 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -362,8 +362,10 @@ class NetdevSim: return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) - def set_xdp(self, bpf, mode, force=False, JSON=True, + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, fail=True, include_stderr=False): + if verbose: + bpf += " verbose" return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), force=force, JSON=JSON, fail=fail, include_stderr=include_stderr) @@ -427,11 +429,13 @@ class NetdevSim: (len(filters), expected)) return filters - def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False, - fail=True, include_stderr=False): + def cls_bpf_add_filter(self, bpf, da=False, verbose=False, skip_sw=False, + skip_hw=False, fail=True, include_stderr=False): params = "" if da: params += " da" + if verbose: + params += " verbose" if skip_sw: params += " skip_sw" if skip_hw: @@ -520,6 +524,13 @@ def check_extack(output, reference, args): def check_extack_nsim(output, reference, args): check_extack(output, "Error: netdevsim: " + reference, args) +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") + # Parse command line parser = argparse.ArgumentParser() parser.add_argument("--log", help="output verbose log to given file") @@ -634,8 +645,10 @@ try: sim.wait_for_flush() start_test("Test TC offloads work...") - ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) fail(ret != 0, "TC filter did not load with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test TC offload basics...") dfs = sim.dfs_get_bound_progs(expected=1) @@ -744,12 +757,13 @@ try: sim.wait_for_flush() start_test("Test XDP offload...") - sim.set_xdp(obj, "offload") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) ipl = sim.ip_link_show(xdp=True) link_xdp = ipl["xdp"]["prog"] progs = bpftool_prog_list(expected=1) prog = progs[0] fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test XDP offload is device bound...") dfs = sim.dfs_get_bound_progs(expected=1) -- cgit v1.2.3 From 6d2d58f1b7ec379eb9467a5bc010ba49295d7714 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 23 Jan 2018 11:22:56 -0800 Subject: selftests/bpf: validate replace of TC filters is working Daniel discovered recently I broke TC filter replace (and fixed it in commit ad9294dbc227 ("bpf: fix cls_bpf on filter replace")). Add a test to make sure it never happens again. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 55 ++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index d128a16ee9a8..ae3eea3ab820 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -429,8 +429,26 @@ class NetdevSim: (len(filters), expected)) return filters - def cls_bpf_add_filter(self, bpf, da=False, verbose=False, skip_sw=False, - skip_hw=False, fail=True, include_stderr=False): + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + params = "" if da: params += " da" @@ -440,9 +458,10 @@ class NetdevSim: params += " skip_sw" if skip_hw: params += " skip_hw" - return tc("filter add dev %s ingress bpf %s %s" % - (self['ifname'], bpf, params), - fail=fail, include_stderr=include_stderr) + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + params=params, + fail=fail, include_stderr=include_stderr) def set_ethtool_tc_offloads(self, enable, fail=True): args = "hw-tc-offload %s" % ("on" if enable else "off") @@ -644,6 +663,32 @@ try: args) sim.wait_for_flush() + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.tc_flush_filters() + start_test("Test TC offloads work...") ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, fail=False, include_stderr=True) -- cgit v1.2.3 From 31e95b61e172144bb2b626a291db1bdc0769275b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 23 Jan 2018 20:05:51 -0800 Subject: selftests/bpf: make 'dubious pointer arithmetic' test useful mostly revert the previous workaround and make 'dubious pointer arithmetic' test useful again. Use (ptr - ptr) << const instead of ptr << const to generate large scalar. The rest stays as before commit 2b36047e7889. Fixes: 2b36047e7889 ("selftests/bpf: fix test_align") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_align.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index e19b410125eb..ff8bd7e3e50c 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -446,11 +446,9 @@ static struct bpf_align_test tests[] = { .insns = { PREP_PKT_POINTERS, BPF_MOV64_IMM(BPF_REG_0, 0), - /* ptr & const => unknown & const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40), - /* ptr << const => unknown << const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + /* (ptr - ptr) << 2 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2), /* We have a (4n) value. Let's make a packet offset * out of it. First add 14, to make it a (4n+2) @@ -473,8 +471,26 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, - /* R5 bitwise operator &= on pointer prohibited */ + {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + /* (ptr - ptr) << 2 == unknown, (4n) */ + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + /* (4n) + 14 == (4n+2). We blow our bounds, because + * the add could overflow. + */ + {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + /* Checked s>=0 */ + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* packet pointer + nonnegative (4n+2) */ + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able + * to overflow if the packet pointer started in the + * upper half of the address space. + * So we did not get a 'range' on R6, and the access + * attempt will fail. + */ + {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, } }, { -- cgit v1.2.3 From d6d4f60c3a0933852dcc40a2142d93027ea1da76 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:16 -0800 Subject: bpf: add selftest for tcpbpf Added a selftest for tcpbpf (sock_ops) that checks that the appropriate callbacks occured and that it can access tcp_sock fields and that their values are correct. Run with command: ./test_tcpbpf_user Adding the flag "-d" will show why it did not pass. Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 86 ++++++++++++++++- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bpf_helpers.h | 2 + tools/testing/selftests/bpf/tcp_client.py | 51 ++++++++++ tools/testing/selftests/bpf/tcp_server.py | 83 ++++++++++++++++ tools/testing/selftests/bpf/test_tcpbpf.h | 16 ++++ tools/testing/selftests/bpf/test_tcpbpf_kern.c | 118 +++++++++++++++++++++++ tools/testing/selftests/bpf/test_tcpbpf_user.c | 126 +++++++++++++++++++++++++ 8 files changed, 480 insertions(+), 6 deletions(-) create mode 100755 tools/testing/selftests/bpf/tcp_client.py create mode 100755 tools/testing/selftests/bpf/tcp_server.py create mode 100644 tools/testing/selftests/bpf/test_tcpbpf.h create mode 100644 tools/testing/selftests/bpf/test_tcpbpf_kern.c create mode 100644 tools/testing/selftests/bpf/test_tcpbpf_user.c (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index af1f49ad8b88..db6bdc375126 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -17,7 +17,7 @@ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ +#define BPF_DW 0x18 /* double word (64-bit) */ #define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ @@ -642,6 +642,14 @@ union bpf_attr { * @optlen: length of optval in bytes * Return: 0 or negative error * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) * Grow or shrink room in sk_buff. * @skb: pointer to skb @@ -748,7 +756,8 @@ union bpf_attr { FN(perf_event_read_value), \ FN(perf_prog_read_value), \ FN(getsockopt), \ - FN(override_return), + FN(override_return), \ + FN(sock_ops_cb_flags_set), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -952,8 +961,9 @@ struct bpf_map_info { struct bpf_sock_ops { __u32 op; union { - __u32 reply; - __u32 replylong[4]; + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ }; __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -968,8 +978,39 @@ struct bpf_sock_ops { */ __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; }; +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently + * supported cb flags + */ + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -1003,6 +1044,43 @@ enum { * a congestion threshold. RTTs above * this indicate congestion */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3a44b655d852..98688352208b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -14,13 +14,13 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i LDLIBS += -lcap -lelf -lrt TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup + test_align test_verifier_log test_dev_cgroup test_tcpbpf_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ - sample_map_ret0.o + sample_map_ret0.o test_tcpbpf_kern.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 33cb00e46c49..dde2c11d7771 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_getsockopt; +static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = + (void *) BPF_FUNC_sock_ops_cb_flags_set; static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py new file mode 100755 index 000000000000..481dccdf140c --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +serverPort = int(sys.argv[1]) +HostName = socket.gethostname() + +# create active socket +sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +try: + sock.connect((HostName, serverPort)) +except socket.error as e: + sys.exit(1) + +buf = '' +n = 0 +while n < 1000: + buf += '+' + n += 1 + +sock.settimeout(1); +n = send(sock, buf) +n = read(sock, 500) +sys.exit(0) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py new file mode 100755 index 000000000000..bc454d7d0be2 --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +SERVER_PORT = 12877 +MAX_PORTS = 2 + +serverPort = SERVER_PORT +serverSocket = None + +HostName = socket.gethostname() + +# create passive socket +serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +host = socket.gethostname() + +try: serverSocket.bind((host, 0)) +except socket.error as msg: + print 'bind fails: ', msg + +sn = serverSocket.getsockname() +serverPort = sn[1] + +cmdStr = ("./tcp_client.py %d &") % (serverPort) +os.system(cmdStr) + +buf = '' +n = 0 +while n < 500: + buf += '.' + n += 1 + +serverSocket.listen(MAX_PORTS) +readList = [serverSocket] + +while True: + readyRead, readyWrite, inError = \ + select.select(readList, [], [], 2) + + if len(readyRead) > 0: + waitCount = 0 + for sock in readyRead: + if sock == serverSocket: + (clientSocket, address) = serverSocket.accept() + address = str(address[0]) + readList.append(clientSocket) + else: + sock.settimeout(1); + s = read(sock, 1000) + n = send(sock, buf) + sock.close() + serverSocket.close() + sys.exit(0) + else: + print 'Select timeout!' + sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h new file mode 100644 index 000000000000..2fe43289943c --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpbpf_globals { + __u32 event_map; + __u32 total_retrans; + __u32 data_segs_in; + __u32 data_segs_out; + __u32 bad_cb_test_rv; + __u32 good_cb_test_rv; + __u64 bytes_received; + __u64 bytes_acked; +}; +#endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c new file mode 100644 index 000000000000..66bf71541903 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpbpf.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 2, +}; + +static inline void update_event_map(int event) +{ + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (gp == NULL) { + struct tcpbpf_globals g = {0}; + + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } else { + g = *gp; + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } +} + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int bad_call_rv = 0; + int good_call_rv = 0; + int op; + int v = 0; + + op = (int) skops->op; + + update_event_map(op); + + switch (op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Test failure to set largest cb flag (assumes not defined) */ + bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + /* Set callback */ + good_call_rv = bpf_sock_ops_cb_flags_set(skops, + BPF_SOCK_OPS_STATE_CB_FLAG); + /* Update results */ + { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.bad_cb_test_rv = bad_call_rv; + g.good_cb_test_rv = good_call_rv; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set callback */ +// good_call_rv = bpf_sock_ops_cb_flags_set(skops, +// BPF_SOCK_OPS_STATE_CB_FLAG); + skops->sk_txhash = 0x12345f; + v = 0xff; + rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, + sizeof(v)); + break; + case BPF_SOCK_OPS_RTO_CB: + break; + case BPF_SOCK_OPS_RETRANS_CB: + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] == BPF_TCP_CLOSE) { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c new file mode 100644 index 000000000000..95a370f3d378 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_util.h" +#include +#include "test_tcpbpf.h" + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +#define SYSTEM(CMD) \ + do { \ + if (system(CMD)) { \ + printf("system(%s) FAILS!\n", CMD); \ + } \ + } while (0) + +int main(int argc, char **argv) +{ + const char *file = "test_tcpbpf_kern.o"; + struct tcpbpf_globals g = {0}; + int cg_fd, prog_fd, map_fd; + bool debug_flag = false; + int error = EXIT_FAILURE; + struct bpf_object *obj; + char cmd[100], *dir; + struct stat buffer; + __u32 key = 0; + int pid; + int rv; + + if (argc > 1 && strcmp(argv[1], "-d") == 0) + debug_flag = true; + + dir = "/tmp/cgroupv2/foo"; + + if (stat(dir, &buffer) != 0) { + SYSTEM("mkdir -p /tmp/cgroupv2"); + SYSTEM("mount -t cgroup2 none /tmp/cgroupv2"); + SYSTEM("mkdir -p /tmp/cgroupv2/foo"); + } + pid = (int) getpid(); + sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid); + SYSTEM(cmd); + + cg_fd = open(dir, O_DIRECTORY, O_RDONLY); + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + SYSTEM("./tcp_server.py"); + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + if (g.bytes_received != 501 || g.bytes_acked != 1002 || + g.data_segs_in != 1 || g.data_segs_out != 1 || + (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 || + g.good_cb_test_rv != 0) { + printf("FAILED: Wrong stats\n"); + if (debug_flag) { + printf("\n"); + printf("bytes_received: %d (expecting 501)\n", + (int)g.bytes_received); + printf("bytes_acked: %d (expecting 1002)\n", + (int)g.bytes_acked); + printf("data_segs_in: %d (expecting 1)\n", + g.data_segs_in); + printf("data_segs_out: %d (expecting 1)\n", + g.data_segs_out); + printf("event_map: 0x%x (at least 0x47e)\n", + g.event_map); + printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n", + g.bad_cb_test_rv); + printf("good_cb_test_rv:0x%x (expecting 0)\n", + g.good_cb_test_rv); + } + goto err; + } + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + return error; + +} -- cgit v1.2.3 From 2fb89a38d35bb6c935edb819d9096de455ce87cf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Jan 2018 14:00:52 -0800 Subject: selftests/bpf: check for spurious extacks from the driver Drivers should not report errors when offload is not forced. Check stdout and stderr for familiar messages when with no skip flags and with skip_hw. Check for add, replace, and destroy. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index ae3eea3ab820..49f5ceeabfa6 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -543,6 +543,10 @@ def check_extack(output, reference, args): def check_extack_nsim(output, reference, args): check_extack(output, "Error: netdevsim: " + reference, args) +def check_no_extack(res, needle): + fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), + "Found '%s' in command output, leaky extack?" % (needle)) + def check_verifier_log(output, reference): lines = output.split("\n") for l in reversed(lines): @@ -550,6 +554,18 @@ def check_verifier_log(output, reference): return fail(True, "Missing or incorrect message from netdevsim in verifier log") +def test_spurios_extack(sim, obj, skip_hw, needle): + res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, + include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_hw=skip_hw, include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf", + include_stderr=True) + check_no_extack(res, needle) + + # Parse command line parser = argparse.ArgumentParser() parser.add_argument("--log", help="output verbose log to given file") @@ -687,6 +703,17 @@ try: (j)) sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + start_test("Test spurious extack from the driver...") + test_spurios_extack(sim, obj, False, "netdevsim") + test_spurios_extack(sim, obj, True, "netdevsim") + + sim.set_ethtool_tc_offloads(False) + + test_spurios_extack(sim, obj, False, "TC offload is disabled") + test_spurios_extack(sim, obj, True, "TC offload is disabled") + + sim.set_ethtool_tc_offloads(True) + sim.tc_flush_filters() start_test("Test TC offloads work...") -- cgit v1.2.3 From baf6a07e040d8308165654c7f49ee9ee18cd89be Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Jan 2018 14:00:53 -0800 Subject: selftests/bpf: check for chain-non-0 extack message Make sure netdevsim doesn't allow offload of chains other than 0, and that it reports the expected extack message. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 49f5ceeabfa6..e78aad0a68bb 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -430,13 +430,15 @@ class NetdevSim: return filters def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, - cls="", params="", + chain=None, cls="", params="", fail=True, include_stderr=False): spec = "" if prio is not None: spec += " prio %d" % (prio) if handle: spec += " handle %s" % (handle) + if chain is not None: + spec += " chain %d" % (chain) return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, @@ -444,7 +446,7 @@ class NetdevSim: fail=fail, include_stderr=include_stderr) def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, - da=False, verbose=False, + chain=None, da=False, verbose=False, skip_sw=False, skip_hw=False, fail=True, include_stderr=False): cls = "bpf " + bpf @@ -460,7 +462,7 @@ class NetdevSim: params += " skip_hw" return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, - params=params, + chain=chain, params=params, fail=fail, include_stderr=include_stderr) def set_ethtool_tc_offloads(self, enable, fail=True): @@ -679,6 +681,14 @@ try: args) sim.wait_for_flush() + start_test("Test non-0 chain offload...") + ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1, + skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Offloaded a filter to chain other than 0") + check_extack(err, "Error: Driver supports only offload of chain 0.", args) + sim.tc_flush_filters() + start_test("Test TC replace...") sim.cls_bpf_add_filter(obj, prio=1, handle=1) sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) -- cgit v1.2.3 From 771fc607e6b97be8f0cc7dfaa61173009c2214d4 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 26 Jan 2018 12:06:07 -0800 Subject: bpf: clean up from test_tcpbpf_kern.c Removed commented lines from test_tcpbpf_kern.c Fixes: d6d4f60c3a09 bpf: add selftest for tcpbpf Signed-off-by: Lawrence Brakmo Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tcpbpf_kern.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 66bf71541903..57119ad57a3f 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -79,9 +79,6 @@ int bpf_testcb(struct bpf_sock_ops *skops) } break; case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: - /* Set callback */ -// good_call_rv = bpf_sock_ops_cb_flags_set(skops, -// BPF_SOCK_OPS_STATE_CB_FLAG); skops->sk_txhash = 0x12345f; v = 0xff; rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, -- cgit v1.2.3 From 21ccaf21497b72f42133182716a42dbf573d314b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Jan 2018 23:33:48 +0100 Subject: bpf: add further test cases around div/mod and others Update selftests to relfect recent changes and add various new test cases. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 343 ++++++++++++++++++++++++++-- 1 file changed, 330 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 9e7075b268be..697bd83de295 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -111,7 +112,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "DIV32 by 0, zero check 2", @@ -123,7 +124,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "DIV64 by 0, zero check", @@ -135,7 +136,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "MOD32 by 0, zero check 1", @@ -147,7 +148,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "MOD32 by 0, zero check 2", @@ -159,7 +160,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "MOD64 by 0, zero check", @@ -171,13 +172,245 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_2, 16), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 8, + }, + { + "DIV32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 3), + BPF_MOV32_IMM(BPF_REG_2, 5), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD64 by 0, zero check 2, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, -1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = -1, + }, + /* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ + { + "DIV32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, .retval = 0, }, + { + "MOD32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "xor32 zero extend check", + .insns = { + BPF_MOV32_IMM(BPF_REG_2, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), + BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, { "empty prog", .insns = { }, - .errstr = "last insn is not an exit or jmp", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -374,7 +607,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode c4", }, { "arsh32 on reg", @@ -385,7 +618,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode cc", }, { "arsh64 on imm", @@ -501,7 +734,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_CALL uses reserved", + .errstr = "unknown opcode 8d", .result = REJECT, }, { @@ -691,7 +924,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(0, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -709,7 +942,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -718,7 +951,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, -1, -1, -1, -1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -7543,7 +7776,7 @@ static struct bpf_test tests[] = { }, BPF_EXIT_INSN(), }, - .errstr = "BPF_END uses reserved fields", + .errstr = "unknown opcode d7", .result = REJECT, }, { @@ -8963,6 +9196,90 @@ static struct bpf_test tests[] = { .result = ACCEPT, .retval = 1, }, + { + "calls: div by 0 in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: multiple ret types in subprog 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: multiple ret types in subprog 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, data)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 16 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, { "calls: overlapping caller/callee", .insns = { -- cgit v1.2.3 From af32efeede9e188fefe0af51d117c31cf281de65 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 26 Jan 2018 15:06:08 -0800 Subject: tools/bpf: add a multithreaded stress test in bpf selftests test_lpm_map The new test will spawn four threads, doing map update, delete, lookup and get_next_key in parallel. It is able to reproduce the issue in the previous commit found by syzbot and Eric Dumazet. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_lpm_map.c | 95 ++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 98688352208b..bf05bc5e36e5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),) endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include -LDLIBS += -lcap -lelf -lrt +LDLIBS += -lcap -lelf -lrt -lpthread TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 081510853c6d..2be87e9ee28d 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void) close(map_fd); } +#define MAX_TEST_KEYS 4 +struct lpm_mt_test_info { + int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */ + int iter; + int map_fd; + struct { + __u32 prefixlen; + __u32 data; + } key[MAX_TEST_KEYS]; +}; + +static void *lpm_test_command(void *arg) +{ + int i, j, ret, iter, key_size; + struct lpm_mt_test_info *info = arg; + struct bpf_lpm_trie_key *key_p; + + key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); + key_p = alloca(key_size); + for (iter = 0; iter < info->iter; iter++) + for (i = 0; i < MAX_TEST_KEYS; i++) { + /* first half of iterations in forward order, + * and second half in backward order. + */ + j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1; + key_p->prefixlen = info->key[j].prefixlen; + memcpy(key_p->data, &info->key[j].data, sizeof(__u32)); + if (info->cmd == 0) { + __u32 value = j; + /* update must succeed */ + assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0); + } else if (info->cmd == 1) { + ret = bpf_map_delete_elem(info->map_fd, key_p); + assert(ret == 0 || errno == ENOENT); + } else if (info->cmd == 2) { + __u32 value; + ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); + assert(ret == 0 || errno == ENOENT); + } else { + struct bpf_lpm_trie_key *next_key_p = alloca(key_size); + ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); + assert(ret == 0 || errno == ENOENT || errno == ENOMEM); + } + } + + // Pass successful exit info back to the main thread + pthread_exit((void *)info); +} + +static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) +{ + info->iter = 2000; + info->map_fd = map_fd; + info->key[0].prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", &info->key[0].data); + info->key[1].prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", &info->key[1].data); + info->key[2].prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", &info->key[2].data); + info->key[3].prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", &info->key[3].data); +} + +static void test_lpm_multi_thread(void) +{ + struct lpm_mt_test_info info[4]; + size_t key_size, value_size; + pthread_t thread_id[4]; + int i, map_fd; + void *ret; + + /* create a trie */ + value_size = sizeof(__u32); + key_size = sizeof(struct bpf_lpm_trie_key) + value_size; + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, + 100, BPF_F_NO_PREALLOC); + + /* create 4 threads to test update, delete, lookup and get_next_key */ + setup_lpm_mt_test_info(&info[0], map_fd); + for (i = 0; i < 4; i++) { + if (i != 0) + memcpy(&info[i], &info[0], sizeof(info[i])); + info[i].cmd = i; + assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0); + } + + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -667,6 +760,8 @@ int main(void) test_lpm_get_next_key(); + test_lpm_multi_thread(); + printf("test_lpm: OK\n"); return 0; } -- cgit v1.2.3 From 03aadf9499ba71ebc6059317fc409012c95ddcdb Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:58 +0300 Subject: NTB: ntb_test: Safely use paths with whitespace If some of variables like LOC/REM or LOCAL_*/REMOTE_* got whitespaces, the script may fail with syntax error. Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem") Signed-off-by: Serge Semin Acked-by: Logan Gunthorpe Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 62 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 5fc7ad359e21..a8647ad891eb 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -87,7 +87,7 @@ set -e function _modprobe() { - modprobe "$@" + modprobe "$@" if [[ "$REMOTE_HOST" != "" ]]; then ssh "$REMOTE_HOST" modprobe "$@" @@ -274,13 +274,13 @@ function pingpong_test() echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)" - LOC_START=$(read_file $LOC/count) - REM_START=$(read_file $REM/count) + LOC_START=$(read_file "$LOC/count") + REM_START=$(read_file "$REM/count") sleep 7 - LOC_END=$(read_file $LOC/count) - REM_END=$(read_file $REM/count) + LOC_END=$(read_file "$LOC/count") + REM_END=$(read_file "$REM/count") if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then echo "Ping pong counter not incrementing!" >&2 @@ -304,15 +304,15 @@ function perf_test() max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA echo "Running local perf test $WITH DMA" - write_file "" $LOCAL_PERF/run + write_file "" "$LOCAL_PERF/run" echo -n " " - read_file $LOCAL_PERF/run + read_file "$LOCAL_PERF/run" echo " Passed" echo "Running remote perf test $WITH DMA" - write_file "" $REMOTE_PERF/run + write_file "" "$REMOTE_PERF/run" echo -n " " - read_file $REMOTE_PERF/run + read_file "$REMOTE_PERF/run" echo " Passed" _modprobe -r ntb_perf @@ -320,39 +320,39 @@ function perf_test() function ntb_tool_tests() { - LOCAL_TOOL=$DEBUGFS/ntb_tool/$LOCAL_DEV - REMOTE_TOOL=$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV + LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV" + REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV" echo "Starting ntb_tool tests..." _modprobe ntb_tool - write_file Y $LOCAL_TOOL/link_event - write_file Y $REMOTE_TOOL/link_event + write_file "Y" "$LOCAL_TOOL/link_event" + write_file "Y" "$REMOTE_TOOL/link_event" - link_test $LOCAL_TOOL $REMOTE_TOOL - link_test $REMOTE_TOOL $LOCAL_TOOL + link_test "$LOCAL_TOOL" "$REMOTE_TOOL" + link_test "$REMOTE_TOOL" "$LOCAL_TOOL" #Ensure the link is up on both sides before continuing - write_file Y $LOCAL_TOOL/link_event - write_file Y $REMOTE_TOOL/link_event + write_file "Y" "$LOCAL_TOOL/link_event" + write_file "Y" "$REMOTE_TOOL/link_event" - for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do + for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do PT=$(basename $PEER_TRANS) - write_file $MW_SIZE $LOCAL_TOOL/$PT - write_file $MW_SIZE $REMOTE_TOOL/$PT + write_file $MW_SIZE "$LOCAL_TOOL/$PT" + write_file $MW_SIZE "$REMOTE_TOOL/$PT" done - doorbell_test $LOCAL_TOOL $REMOTE_TOOL - doorbell_test $REMOTE_TOOL $LOCAL_TOOL - scratchpad_test $LOCAL_TOOL $REMOTE_TOOL - scratchpad_test $REMOTE_TOOL $LOCAL_TOOL + doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL" + doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL" + scratchpad_test "$LOCAL_TOOL" "$REMOTE_TOOL" + scratchpad_test "$REMOTE_TOOL" "$LOCAL_TOOL" - for MW in $(ls $LOCAL_TOOL/mw*); do + for MW in $(ls "$LOCAL_TOOL"/mw*); do MW=$(basename $MW) - mw_test $MW $LOCAL_TOOL $REMOTE_TOOL - mw_test $MW $REMOTE_TOOL $LOCAL_TOOL + mw_test $MW "$LOCAL_TOOL" "$REMOTE_TOOL" + mw_test $MW "$REMOTE_TOOL" "$LOCAL_TOOL" done _modprobe -r ntb_tool @@ -360,8 +360,8 @@ function ntb_tool_tests() function ntb_pingpong_tests() { - LOCAL_PP=$DEBUGFS/ntb_pingpong/$LOCAL_DEV - REMOTE_PP=$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV + LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV" + REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV" echo "Starting ntb_pingpong tests..." @@ -374,8 +374,8 @@ function ntb_pingpong_tests() function ntb_perf_tests() { - LOCAL_PERF=$DEBUGFS/ntb_perf/$LOCAL_DEV - REMOTE_PERF=$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV + LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV" + REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV" echo "Starting ntb_perf tests..." -- cgit v1.2.3 From dd151d7200ae6c5cfddbec7eb1a55ba8319e12b2 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:59 +0300 Subject: NTB: ntb_test: Add ntb_tool port tests Multi-port interface is now available in ntb_tool driver. According to the new NTB API, there might be more than two devices connected over NTB. It means each device can have multiple freely enumerated ports. Each port got index assigned by NTB hardware driver. This test is performed to determine the local and peer ports as well as their indexes. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index a8647ad891eb..541ba70ad640 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -127,6 +127,56 @@ function write_file() fi } +function check_file() +{ + split_remote $1 + + if [[ "$REMOTE" != "" ]]; then + ssh "$REMOTE" "[[ -e ${VPATH} ]]" + else + [[ -e ${VPATH} ]] + fi +} + +function find_pidx() +{ + PORT=$1 + PPATH=$2 + + for ((i = 0; i < 64; i++)); do + PEER_DIR="$PPATH/peer$i" + + check_file ${PEER_DIR} || break + + PEER_PORT=$(read_file "${PEER_DIR}/port") + if [[ ${PORT} -eq $PEER_PORT ]]; then + echo $i + return 0 + fi + done + + return 1 +} + +function port_test() +{ + LOC=$1 + REM=$2 + + echo "Running port tests on: $(basename $LOC) / $(basename $REM)" + + LOCAL_PORT=$(read_file "$LOC/port") + REMOTE_PORT=$(read_file "$REM/port") + + LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC") + REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM") + + echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host" + echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host" + + echo " Passed" +} + function link_test() { LOC=$1 @@ -327,6 +377,8 @@ function ntb_tool_tests() _modprobe ntb_tool + port_test "$LOCAL_TOOL" "$REMOTE_TOOL" + write_file "Y" "$LOCAL_TOOL/link_event" write_file "Y" "$REMOTE_TOOL/link_event" -- cgit v1.2.3 From 0298e5b9d0107a2c9d6e5ddf52ab90b405c3802d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:00 +0300 Subject: NTB: ntb_test: Update ntb_tool link tests Link Up and Down methods are used to change NTB link settings on local side only for multi-port devices. Link is considered up only if both sides local and peer set it up. Intel/AMD hardware acts a bit different by assigning the Primary and Secondary roles, so Primary device only is able to change the link state. Such behaviour should be reflected in the test code. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 541ba70ad640..a3942f31c057 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -138,6 +138,11 @@ function check_file() fi } +function subdirname() +{ + echo $(basename $(dirname $1)) 2> /dev/null +} + function find_pidx() { PORT=$1 @@ -183,9 +188,9 @@ function link_test() REM=$2 EXP=0 - echo "Running link tests on: $(basename $LOC) / $(basename $REM)" + echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)" - if ! write_file "N" "$LOC/link" 2> /dev/null; then + if ! write_file "N" "$LOC/../link" 2> /dev/null; then echo " Unsupported" return fi @@ -193,12 +198,11 @@ function link_test() write_file "N" "$LOC/link_event" if [[ $(read_file "$REM/link") != "N" ]]; then - echo "Expected remote link to be down in $REM/link" >&2 + echo "Expected link to be down in $REM/link" >&2 exit -1 fi - write_file "Y" "$LOC/link" - write_file "Y" "$LOC/link_event" + write_file "Y" "$LOC/../link" echo " Passed" } @@ -379,15 +383,15 @@ function ntb_tool_tests() port_test "$LOCAL_TOOL" "$REMOTE_TOOL" - write_file "Y" "$LOCAL_TOOL/link_event" - write_file "Y" "$REMOTE_TOOL/link_event" + LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX" + REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX" - link_test "$LOCAL_TOOL" "$REMOTE_TOOL" - link_test "$REMOTE_TOOL" "$LOCAL_TOOL" + link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" #Ensure the link is up on both sides before continuing - write_file "Y" "$LOCAL_TOOL/link_event" - write_file "Y" "$REMOTE_TOOL/link_event" + write_file "Y" "$LOCAL_PEER_TOOL/link_event" + write_file "Y" "$REMOTE_PEER_TOOL/link_event" for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do PT=$(basename $PEER_TRANS) -- cgit v1.2.3 From 4e1ef427f273504fd99556f6f60a1218563ab772 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:01 +0300 Subject: NTB: ntb_test: Update ntb_tool DB tests DB interface of ntb_tool driver hasn't been changed much, but db_valid_mask DebugFS file has still been added. In this case it's much better to test all valid DB bits instead of using the predefined mask, which may be incorrect in general. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index a3942f31c057..ec2e51183f8d 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -18,7 +18,6 @@ LIST_DEVS=FALSE DEBUGFS=${DEBUGFS-/sys/kernel/debug} -DB_BITMASK=0x7FFF PERF_RUN_ORDER=32 MAX_MW_SIZE=0 RUN_DMA_TESTS= @@ -39,7 +38,6 @@ function show_help() echo "be highly recommended." echo echo "Options:" - echo " -b BITMASK doorbell clear bitmask for ntb_tool" echo " -C don't cleanup ntb modules on exit" echo " -d run dma tests" echo " -h show this help message" @@ -56,7 +54,6 @@ function parse_args() OPTIND=0 while getopts "b:Cdhlm:r:p:w:" opt; do case "$opt" in - b) DB_BITMASK=${OPTARG} ;; C) DONT_CLEANUP=1 ;; d) RUN_DMA_TESTS=1 ;; h) show_help; exit 0 ;; @@ -215,21 +212,30 @@ function doorbell_test() echo "Running db tests on: $(basename $LOC) / $(basename $REM)" - write_file "c $DB_BITMASK" "$REM/db" + DB_VALID_MASK=$(read_file "$LOC/db_valid_mask") - for ((i=1; i <= 8; i++)); do - let DB=$(read_file "$REM/db") || true - if [[ "$DB" != "$EXP" ]]; then + write_file "c $DB_VALID_MASK" "$REM/db" + + for ((i = 0; i < 64; i++)); do + DB=$(read_file "$REM/db") + if [[ "$DB" -ne "$EXP" ]]; then echo "Doorbell doesn't match expected value $EXP " \ "in $REM/db" >&2 exit -1 fi - let "MASK=1 << ($i-1)" || true - let "EXP=$EXP | $MASK" || true + let "MASK = (1 << $i) & $DB_VALID_MASK" || true + let "EXP = $EXP | $MASK" || true + write_file "s $MASK" "$LOC/peer_db" done + write_file "c $DB_VALID_MASK" "$REM/db_mask" + write_file $DB_VALID_MASK "$REM/db_event" + write_file "s $DB_VALID_MASK" "$REM/db_mask" + + write_file "c $DB_VALID_MASK" "$REM/db" + echo " Passed" } @@ -393,14 +399,15 @@ function ntb_tool_tests() write_file "Y" "$LOCAL_PEER_TOOL/link_event" write_file "Y" "$REMOTE_PEER_TOOL/link_event" + doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL" + doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL" + for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do PT=$(basename $PEER_TRANS) write_file $MW_SIZE "$LOCAL_TOOL/$PT" write_file $MW_SIZE "$REMOTE_TOOL/$PT" done - doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL" - doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL" scratchpad_test "$LOCAL_TOOL" "$REMOTE_TOOL" scratchpad_test "$REMOTE_TOOL" "$LOCAL_TOOL" -- cgit v1.2.3 From 06bd0407d06c4a8437401952caed4bc6bbba9e44 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:02 +0300 Subject: NTB: ntb_test: Update ntb_tool Scratchpad tests Scratchpad NTB API has changed so has the ntb_tool driver. Outbound Scratchpad DebugFS files have been moved to peer specific directories. Each scratchpad is now available via separate file. The test code has been accordingly altered. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 43 ++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index ec2e51183f8d..06b9b156a6fc 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -239,35 +239,44 @@ function doorbell_test() echo " Passed" } -function read_spad() +function get_files_count() { - VPATH=$1 - IDX=$2 + NAME=$1 + LOC=$2 + + split_remote $LOC - ROW=($(read_file "$VPATH" | grep -e "^$IDX")) - let VAL=${ROW[1]} || true - echo $VAL + if [[ "$REMOTE" == "" ]]; then + echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l) + else + echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \ + wc -l" 2> /dev/null) + fi } function scratchpad_test() { LOC=$1 REM=$2 - CNT=$(read_file "$LOC/spad" | wc -l) - echo "Running spad tests on: $(basename $LOC) / $(basename $REM)" + echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(get_files_count "spad" "$LOC") + + if [[ $CNT -eq 0 ]]; then + echo " Unsupported" + return + fi for ((i = 0; i < $CNT; i++)); do VAL=$RANDOM - write_file "$i $VAL" "$LOC/peer_spad" - RVAL=$(read_spad "$REM/spad" $i) + write_file "$VAL" "$LOC/spad$i" + RVAL=$(read_file "$REM/../spad$i") - if [[ "$VAL" != "$RVAL" ]]; then - echo "Scratchpad doesn't match expected value $VAL " \ - "in $REM/spad, got $RVAL" >&2 + if [[ "$VAL" -ne "$RVAL" ]]; then + echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2 exit -1 fi - done echo " Passed" @@ -402,15 +411,15 @@ function ntb_tool_tests() doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL" doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL" + scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" + for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do PT=$(basename $PEER_TRANS) write_file $MW_SIZE "$LOCAL_TOOL/$PT" write_file $MW_SIZE "$REMOTE_TOOL/$PT" done - scratchpad_test "$LOCAL_TOOL" "$REMOTE_TOOL" - scratchpad_test "$REMOTE_TOOL" "$LOCAL_TOOL" - for MW in $(ls "$LOCAL_TOOL"/mw*); do MW=$(basename $MW) -- cgit v1.2.3 From e770112b2dfa38a9481309f0333d84e122c23e2d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:03 +0300 Subject: NTB: ntb_test: Add ntb_tool Message tests Messages NTB API is now available. ntb_tool driver has been altered to perform messages send and receive operation. The test of messages read/write to/from peer device has been added to the script. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 06b9b156a6fc..849191fd2308 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -282,6 +282,40 @@ function scratchpad_test() echo " Passed" } +function message_test() +{ + LOC=$1 + REM=$2 + + echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(get_files_count "msg" "$LOC") + + if [[ $CNT -eq 0 ]]; then + echo " Unsupported" + return + fi + + MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits") + MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits") + + write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts" + write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts" + + for ((i = 0; i < $CNT; i++)); do + VAL=$RANDOM + write_file "$VAL" "$LOC/msg$i" + RVAL=$(read_file "$REM/../msg$i") + + if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then + echo "Message $i value $RVAL doesn't match $VAL" >&2 + exit -1 + fi + done + + echo " Passed" +} + function write_mw() { split_remote $2 @@ -414,6 +448,9 @@ function ntb_tool_tests() scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" + message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" + for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do PT=$(basename $PEER_TRANS) write_file $MW_SIZE "$LOCAL_TOOL/$PT" -- cgit v1.2.3 From fa63be5428ef8756ec583aed0408ddd96109ff28 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:04 +0300 Subject: NTB: ntb_test: Update ntb_tool MW tests There are devices (like IDT PCIe switches), which outbound MWs xlat address is setup on peer side. In this case local side is supposed to allocate a memory buffer and somehow deliver the xlat DMA address to peer so one could set the outbound MW up. The MW test is altered so to support both previous Intel/AMD and new IDT-like devices. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 89 +++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 849191fd2308..2dea126c85eb 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -43,7 +43,9 @@ function show_help() echo " -h show this help message" echo " -l list available local and remote PCI ids" echo " -r REMOTE_HOST specify the remote's hostname to connect" - echo " to for the test (using ssh)" + echo " to for the test (using ssh)" + echo " -m MW_SIZE memory window size for ntb_tool" + echo " (default: $MW_SIZE)" echo " -p NUM ntb_perf run order (default: $PERF_RUN_ORDER)" echo " -w max_mw_size maxmium memory window size" echo @@ -316,6 +318,32 @@ function message_test() echo " Passed" } +function get_number() +{ + KEY=$1 + + sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p" +} + +function mw_alloc() +{ + IDX=$1 + LOC=$2 + REM=$3 + + write_file $MW_SIZE "$LOC/mw_trans$IDX" + + INB_MW=$(read_file "$LOC/mw_trans$IDX") + MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size") + MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address") + + write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX" + + if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then + echo "MW $IDX size aligned to $MW_ALIGNED_SIZE" + fi +} + function write_mw() { split_remote $2 @@ -328,17 +356,15 @@ function write_mw() fi } -function mw_test() +function mw_check() { IDX=$1 LOC=$2 REM=$3 - echo "Running $IDX tests on: $(basename $LOC) / $(basename $REM)" - - write_mw "$LOC/$IDX" + write_mw "$LOC/mw$IDX" - split_remote "$LOC/$IDX" + split_remote "$LOC/mw$IDX" if [[ "$REMOTE" == "" ]]; then A=$VPATH else @@ -346,7 +372,7 @@ function mw_test() ssh "$REMOTE" cat "$VPATH" > "$A" fi - split_remote "$REM/peer_$IDX" + split_remote "$REM/peer_mw$IDX" if [[ "$REMOTE" == "" ]]; then B=$VPATH else @@ -354,7 +380,7 @@ function mw_test() ssh "$REMOTE" cat "$VPATH" > "$B" fi - cmp -n $MW_SIZE "$A" "$B" + cmp -n $MW_ALIGNED_SIZE "$A" "$B" if [[ $? != 0 ]]; then echo "Memory window $MW did not match!" >&2 fi @@ -366,8 +392,39 @@ function mw_test() if [[ "$B" == "/tmp/*" ]]; then rm "$B" fi +} + +function mw_free() +{ + IDX=$1 + LOC=$2 + REM=$3 + + write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX" + + write_file 0 "$LOC/mw_trans$IDX" +} + +function mw_test() +{ + LOC=$1 + REM=$2 + + CNT=$(get_files_count "mw_trans" "$LOC") + + for ((i = 0; i < $CNT; i++)); do + echo "Running mw$i tests on: $(subdirname $LOC) / " \ + "$(subdirname $REM)" + + mw_alloc $i $LOC $REM + + mw_check $i $LOC $REM + + mw_free $i $LOC $REM + + echo " Passed" + done - echo " Passed" } function pingpong_test() @@ -451,18 +508,8 @@ function ntb_tool_tests() message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" - for PEER_TRANS in $(ls "$LOCAL_TOOL"/peer_trans*); do - PT=$(basename $PEER_TRANS) - write_file $MW_SIZE "$LOCAL_TOOL/$PT" - write_file $MW_SIZE "$REMOTE_TOOL/$PT" - done - - for MW in $(ls "$LOCAL_TOOL"/mw*); do - MW=$(basename $MW) - - mw_test $MW "$LOCAL_TOOL" "$REMOTE_TOOL" - mw_test $MW "$REMOTE_TOOL" "$LOCAL_TOOL" - done + mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" _modprobe -r ntb_tool } -- cgit v1.2.3 From 4517a5701c1c6a30fddb619560589f970f633843 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:05 +0300 Subject: NTB: ntb_test: Update ntb_perf tests ntb_perf driver has been also updated so to have the multi-port interface support. User now must specify what peer port is going to be used to perform the test. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- tools/testing/selftests/ntb/ntb_test.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 2dea126c85eb..08cbfbbc7029 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -39,15 +39,16 @@ function show_help() echo echo "Options:" echo " -C don't cleanup ntb modules on exit" - echo " -d run dma tests" echo " -h show this help message" echo " -l list available local and remote PCI ids" echo " -r REMOTE_HOST specify the remote's hostname to connect" echo " to for the test (using ssh)" echo " -m MW_SIZE memory window size for ntb_tool" echo " (default: $MW_SIZE)" - echo " -p NUM ntb_perf run order (default: $PERF_RUN_ORDER)" - echo " -w max_mw_size maxmium memory window size" + echo " -d run dma tests for ntb_perf" + echo " -p ORDER total data order for ntb_perf" + echo " (default: $PERF_RUN_ORDER)" + echo " -w MAX_MW_SIZE maxmium memory window size for ntb_perf" echo } @@ -460,17 +461,17 @@ function perf_test() WITH="without" fi - _modprobe ntb_perf run_order=$PERF_RUN_ORDER \ + _modprobe ntb_perf total_order=$PERF_RUN_ORDER \ max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA echo "Running local perf test $WITH DMA" - write_file "" "$LOCAL_PERF/run" + write_file "$LOCAL_PIDX" "$LOCAL_PERF/run" echo -n " " read_file "$LOCAL_PERF/run" echo " Passed" echo "Running remote perf test $WITH DMA" - write_file "" "$REMOTE_PERF/run" + write_file "$REMOTE_PIDX" "$REMOTE_PERF/run" echo -n " " read_file "$REMOTE_PERF/run" echo " Passed" -- cgit v1.2.3 From 30f1d370744cc35f26d78a1dd31aeb0e4be93c38 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:39 +0200 Subject: tools/virtio: switch to __ptr_ring_empty We don't rely on lockless guarantees, but it seems cleaner than inverting __ptr_ring_peek. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/ringtest/ptr_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index e6e81305ef46..477899c12c51 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -187,7 +187,7 @@ bool enable_kick() bool avail_empty() { - return !__ptr_ring_peek(&array); + return __ptr_ring_empty(&array); } bool use_buf(unsigned *lenp, void **bufp) -- cgit v1.2.3 From 6dd42157830d875bdd3c6fefd05cbcf0875b51e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:40 +0200 Subject: tools/virtio: more stubs to fix tools build Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/linux/kernel.h | 2 +- tools/virtio/linux/thread_info.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tools/virtio/linux/thread_info.h (limited to 'tools') diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 395521a7a8d8..fca8381bbe04 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) +#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 000000000000..e0f610d08006 --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) -- cgit v1.2.3 From b4eab7de6685ee2691a7e297d511a126dbf53207 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:42 +0200 Subject: tools/virtio: copy READ/WRITE_ONCE This is to make ptr_ring test build again. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/ringtest/main.h | 57 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'tools') diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 5706e075adf2..593a3289c87d 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -134,4 +134,61 @@ static inline void busy_wait(void) barrier(); \ } while (0) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#else +#define smp_wmb() smp_release() +#endif + +#ifdef __alpha__ +#define smp_read_barrier_depends() smp_acquire() +#else +#define smp_read_barrier_depends() do {} while(0) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { \ + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + #endif -- cgit v1.2.3 From 491847f3b29cef0417a03142b96e2a6dea81cca0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 26 Jan 2018 01:36:44 +0200 Subject: tools/virtio: fix smp_mb on x86 Offset 128 overlaps the last word of the redzone. Use 132 which is always beyond that. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/virtio/ringtest/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 593a3289c87d..301d59bfcd0a 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -111,7 +111,7 @@ static inline void busy_wait(void) } #if defined(__x86_64__) || defined(__i386__) -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized -- cgit v1.2.3 From a845c7cf4b4cb5e9e3b2823867892b27646f3a98 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 29 Jan 2018 22:00:39 -0600 Subject: objtool: Improve retpoline alternative handling Currently objtool requires all retpolines to be: a) patched in with alternatives; and b) annotated with ANNOTATE_NOSPEC_ALTERNATIVE. If you forget to do both of the above, objtool segfaults trying to dereference a NULL 'insn->call_dest' pointer. Avoid that situation and print a more helpful error message: quirks.o: warning: objtool: efi_delete_dummy_variable()+0x99: unsupported intra-function call quirks.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE. Future improvements can be made to make objtool smarter with respect to retpolines, but this is a good incremental improvement for now. Reported-and-tested-by: Guenter Roeck Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/819e50b6d9c2e1a22e34c1a636c0b2057cc8c6e5.1517284349.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f40d46e24bcc..bc3490d929ff 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); - /* - * FIXME: Thanks to retpolines, it's now considered - * normal for a function to call within itself. So - * disable this warning for now. - */ -#if 0 - if (!insn->call_dest) { - WARN_FUNC("can't find call dest symbol at offset 0x%lx", - insn->sec, insn->offset, dest_off); + + if (!insn->call_dest && !insn->ignore) { + WARN_FUNC("unsupported intra-function call", + insn->sec, insn->offset); + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); return -1; } -#endif + } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -648,6 +644,8 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; + insn->ignore = orig_insn->ignore_alts; + if (insn->type != INSN_JUMP_CONDITIONAL && insn->type != INSN_JUMP_UNCONDITIONAL) continue; @@ -729,10 +727,6 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } - /* Ignore retpoline alternatives. */ - if (orig_insn->ignore_alts) - continue; - new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -1089,11 +1083,11 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_call_destinations(file); + ret = add_special_section_alts(file); if (ret) return ret; - ret = add_special_section_alts(file); + ret = add_call_destinations(file); if (ret) return ret; @@ -1720,10 +1714,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, insn->visited = true; - list_for_each_entry(alt, &insn->alts, list) { - ret = validate_branch(file, alt->insn, state); - if (ret) - return 1; + if (!insn->ignore_alts) { + list_for_each_entry(alt, &insn->alts, list) { + ret = validate_branch(file, alt->insn, state); + if (ret) + return 1; + } } switch (insn->type) { -- cgit v1.2.3 From 17bc33914bcc98ba3c6b426fd1c49587a25c0597 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 29 Jan 2018 22:00:40 -0600 Subject: objtool: Add support for alternatives at the end of a section Now that the previous patch gave objtool the ability to read retpoline alternatives, it shows a new warning: arch/x86/entry/entry_64.o: warning: objtool: .entry_trampoline: don't know how to handle alternatives at end of section This is due to the JMP_NOSPEC in entry_SYSCALL_64_trampoline(). Previously, objtool ignored this situation because it wasn't needed, and it would have required a bit of extra code. Now that this case exists, add proper support for it. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2a30a3c2158af47d891a76e69bb1ef347e0443fd.1517284349.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 53 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bc3490d929ff..9cd028aa1509 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -594,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file, struct instruction *orig_insn, struct instruction **new_insn) { - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; unsigned long dest_off; last_orig_insn = NULL; @@ -610,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file, last_orig_insn = insn; } - if (!next_insn_same_sec(file, last_orig_insn)) { - WARN("%s: don't know how to handle alternatives at end of section", - special_alt->orig_sec->name); - return -1; - } - - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { - WARN("malloc failed"); - return -1; + if (next_insn_same_sec(file, last_orig_insn)) { + fake_jump = malloc(sizeof(*fake_jump)); + if (!fake_jump) { + WARN("malloc failed"); + return -1; + } + memset(fake_jump, 0, sizeof(*fake_jump)); + INIT_LIST_HEAD(&fake_jump->alts); + clear_insn_state(&fake_jump->state); + + fake_jump->sec = special_alt->new_sec; + fake_jump->offset = -1; + fake_jump->type = INSN_JUMP_UNCONDITIONAL; + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + fake_jump->ignore = true; } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - clear_insn_state(&fake_jump->state); - - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = -1; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - fake_jump->ignore = true; if (!special_alt->new_len) { + if (!fake_jump) { + WARN("%s: empty alternative at end of section", + special_alt->orig_sec->name); + return -1; + } + *new_insn = fake_jump; return 0; } @@ -654,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = insn->offset + insn->len + insn->immediate; - if (dest_off == special_alt->new_off + special_alt->new_len) + if (dest_off == special_alt->new_off + special_alt->new_len) { + if (!fake_jump) { + WARN("%s: alternative jump to end of section", + special_alt->orig_sec->name); + return -1; + } insn->jump_dest = fake_jump; + } if (!insn->jump_dest) { WARN_FUNC("can't find alternative jump destination", @@ -670,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file, return -1; } - list_add(&fake_jump->list, &last_new_insn->list); + if (fake_jump) + list_add(&fake_jump->list, &last_new_insn->list); return 0; } -- cgit v1.2.3 From 830c1e3d16b2c1733cd1ec9c8f4d47a398ae31bc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 29 Jan 2018 22:00:41 -0600 Subject: objtool: Warn on stripped section symbol With the following fix: 2a0098d70640 ("objtool: Fix seg fault with gold linker") ... a seg fault was avoided, but the original seg fault condition in objtool wasn't fixed. Replace the seg fault with an error message. Suggested-by: Ingo Molnar Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/dc4585a70d6b975c99fc51d1957ccdde7bd52f3a.1517284349.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/orc_gen.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e61fe703197b..18384d9be4e1 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, struct orc_entry *orc; struct rela *rela; + if (!insn_sec->sym) { + WARN("missing symbol for section %s", insn_sec->name); + return -1; + } + /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); -- cgit v1.2.3 From f229a55c31a7e12a15c7b4dcf9a97a9bf7a72ce8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 26 Oct 2017 04:48:01 +0300 Subject: virtio/ringtest: fix up need_event math last kicked event index must be updated unconditionally: even if we don't need to kick, we do not want to re-check the same entry for events. Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Acked-by: Jason Wang --- tools/virtio/ringtest/ring.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c index 747c5dd47be8..2b9859beea65 100644 --- a/tools/virtio/ringtest/ring.c +++ b/tools/virtio/ringtest/ring.c @@ -188,16 +188,18 @@ bool enable_call() void kick_available(void) { + bool need; + /* Flush in previous flags write */ /* Barrier C (for pairing) */ smp_mb(); - if (!need_event(event->kick_index, - guest.avail_idx, - guest.kicked_avail_idx)) - return; + need = need_event(event->kick_index, + guest.avail_idx, + guest.kicked_avail_idx); guest.kicked_avail_idx = guest.avail_idx; - kick(); + if (need) + kick(); } /* host side */ @@ -253,14 +255,18 @@ bool use_buf(unsigned *lenp, void **bufp) void call_used(void) { + bool need; + /* Flush in previous flags write */ /* Barrier D (for pairing) */ smp_mb(); - if (!need_event(event->call_index, + + need = need_event(event->call_index, host.used_idx, - host.called_used_idx)) - return; + host.called_used_idx); host.called_used_idx = host.used_idx; - call(); + + if (need) + call(); } -- cgit v1.2.3 From a311650ae6ad0c169ade2583f78aa519878ea13f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 27 Oct 2017 16:11:13 +0300 Subject: virtio/ringtest: virtio_ring: fix up need_event math last kicked event index must be updated unconditionally: even if we don't need to kick, we do not want to re-check the same entry for events. Reported-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Reviewed-by: Jens Freimann --- tools/virtio/ringtest/virtio_ring_0_9.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c index bbc3043b2fb1..5fd3fbcb9e57 100644 --- a/tools/virtio/ringtest/virtio_ring_0_9.c +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -225,16 +225,18 @@ bool enable_call() void kick_available(void) { + bool need; + /* Flush in previous flags write */ /* Barrier C (for pairing) */ smp_mb(); - if (!vring_need_event(vring_avail_event(&ring), - guest.avail_idx, - guest.kicked_avail_idx)) - return; + need = vring_need_event(vring_avail_event(&ring), + guest.avail_idx, + guest.kicked_avail_idx); guest.kicked_avail_idx = guest.avail_idx; - kick(); + if (need) + kick(); } /* host side */ @@ -316,14 +318,16 @@ bool use_buf(unsigned *lenp, void **bufp) void call_used(void) { + bool need; + /* Flush in previous flags write */ /* Barrier D (for pairing) */ smp_mb(); - if (!vring_need_event(vring_used_event(&ring), - host.used_idx, - host.called_used_idx)) - return; + need = vring_need_event(vring_used_event(&ring), + host.used_idx, + host.called_used_idx); host.called_used_idx = host.used_idx; - call(); + if (need) + call(); } -- cgit v1.2.3 From 235266b8e11c9db12497bdfc6d5e9100f3434c24 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 31 Jan 2018 16:16:59 -0800 Subject: selftests/vm: move 128TB mmap boundary test to generic directory Architectures like PPC64 support mmap hint address based large address space selection. This test can be run on those architectures too. Move the test from the x86 selftests to selftest/vm so that other architectures can use it too. We also add a few new test scenarios in this patch. We do test a few boundary conditions before we do a high address mmap. PPC64 uses the address limit to validate the address in the fault path. We had bugs in this area w.r.t SLB fault handling before we updated the addess limit. We also touch the allocated space to make sure we don't have any bugs in the fault handling path. [akpm@linux-foundation.org: restore tools/testing/selftests/vm/Makefile alpha ordering] Link: http://lkml.kernel.org/r/20171123165226.32582-1-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: "Kirill A . Shutemov" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 5 +- tools/testing/selftests/vm/run_vmtests | 11 ++ tools/testing/selftests/vm/va_128TBswitch.c | 297 ++++++++++++++++++++++++++++ tools/testing/selftests/x86/5lvl.c | 177 ----------------- 4 files changed, 311 insertions(+), 179 deletions(-) create mode 100644 tools/testing/selftests/vm/va_128TBswitch.c delete mode 100644 tools/testing/selftests/x86/5lvl.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 7f45806bd863..fdefa2295ddc 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -8,17 +8,18 @@ endif CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test +TEST_GEN_FILES += gup_benchmark TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range -TEST_GEN_FILES += gup_benchmark TEST_PROGS := run_vmtests diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index cc826326de87..d2561895a021 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -177,4 +177,15 @@ else echo "[PASS]" fi +echo "-----------------------------" +echo "running virtual address 128TB switch test" +echo "-----------------------------" +./va_128TBswitch +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c new file mode 100644 index 000000000000..e7fe734c374f --- /dev/null +++ b/tools/testing/selftests/vm/va_128TBswitch.c @@ -0,0 +1,297 @@ +/* + * + * Authors: Kirill A. Shutemov + * Authors: Aneesh Kumar K.V + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include +#include +#include + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#ifdef __powerpc64__ +#define PAGE_SIZE (64 << 10) +/* + * This will work with 16M and 2M hugepage size + */ +#define HUGETLB_SIZE (16 << 20) +#else +#define PAGE_SIZE (4 << 10) +#define HUGETLB_SIZE (2 << 20) +#endif + +/* + * >= 128TB is the hint addr value we used to select + * large address space. + */ +#define ADDR_SWITCH_HINT (1UL << 47) +#define LOW_ADDR ((void *) (1UL << 30)) +#define HIGH_ADDR ((void *) (1UL << 48)) + +struct testcase { + void *addr; + unsigned long size; + unsigned long flags; + const char *msg; + unsigned int low_addr_required:1; + unsigned int keep_mapped:1; +}; + +static struct testcase testcases[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + /* + * We should never allocate at the requested address or above it + * The len cross the 128TB boundary. Without MAP_FIXED + * we will always search in the lower address space. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at 128TB, the area is free we should get that + * even without MAP_FIXED. + */ + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, +}; + +static struct testcase hugetlb_testcases[] = { + { + .addr = NULL, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", + }, +}; + +static int run_test(struct testcase *test, int count) +{ + void *p; + int i, ret = 0; + + for (i = 0; i < count; i++) { + struct testcase *t = test + i; + + p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0); + + printf("%s: %p - ", t->msg, p); + + if (p == MAP_FAILED) { + printf("FAILED\n"); + ret = 1; + continue; + } + + if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + printf("FAILED\n"); + ret = 1; + } else { + /* + * Do a dereference of the address returned so that we catch + * bugs in page fault handling + */ + memset(p, 0, t->size); + printf("OK\n"); + } + if (!t->keep_mapped) + munmap(p, t->size); + } + + return ret; +} + +static int supported_arch(void) +{ +#if defined(__powerpc64__) + return 1; +#elif defined(__x86_64__) + return 1; +#else + return 0; +#endif +} + +int main(int argc, char **argv) +{ + int ret; + + if (!supported_arch()) + return 0; + + ret = run_test(testcases, ARRAY_SIZE(testcases)); + if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) + ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + return ret; +} diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c deleted file mode 100644 index 2eafdcd4c2b3..000000000000 --- a/tools/testing/selftests/x86/5lvl.c +++ /dev/null @@ -1,177 +0,0 @@ -#include -#include - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -#define PAGE_SIZE 4096 -#define LOW_ADDR ((void *) (1UL << 30)) -#define HIGH_ADDR ((void *) (1UL << 50)) - -struct testcase { - void *addr; - unsigned long size; - unsigned long flags; - const char *msg; - unsigned int low_addr_required:1; - unsigned int keep_mapped:1; -}; - -static struct testcase testcases[] = { - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void*) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void*) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void*) -1, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void*) -1, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 4UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - (2UL << 20)), - .size = 4UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)", - }, -}; - -int main(int argc, char **argv) -{ - int i; - void *p; - - for (i = 0; i < ARRAY_SIZE(testcases); i++) { - struct testcase *t = testcases + i; - - p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0); - - printf("%s: %p - ", t->msg, p); - - if (p == MAP_FAILED) { - printf("FAILED\n"); - continue; - } - - if (t->low_addr_required && p >= (void *)(1UL << 47)) - printf("FAILED\n"); - else - printf("OK\n"); - if (!t->keep_mapped) - munmap(p, t->size); - } - return 0; -} -- cgit v1.2.3 From 724978457fe29626bb6ce78e6c324c9d2648dcf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 31 Jan 2018 16:19:32 -0800 Subject: memfd-test: test hugetlbfs sealing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove most of the special-casing of hugetlbfs now that sealing is supported. Link: http://lkml.kernel.org/r/20171107122800.25517-7-marcandre.lureau@redhat.com Signed-off-by: Marc-André Lureau Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Michal Hocko Cc: David Herrmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/memfd/memfd_test.c | 150 +++-------------------------- 1 file changed, 15 insertions(+), 135 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 132a54f74e88..59ca090e9752 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -513,6 +513,10 @@ static void mfd_assert_grow_write(int fd) static char *buf; ssize_t l; + /* hugetlbfs does not support write */ + if (hugetlbfs_test) + return; + buf = malloc(mfd_def_size * 8); if (!buf) { printf("malloc(%zu) failed: %m\n", mfd_def_size * 8); @@ -533,6 +537,10 @@ static void mfd_fail_grow_write(int fd) static char *buf; ssize_t l; + /* hugetlbfs does not support write */ + if (hugetlbfs_test) + return; + buf = malloc(mfd_def_size * 8); if (!buf) { printf("malloc(%zu) failed: %m\n", mfd_def_size * 8); @@ -627,18 +635,13 @@ static void test_create(void) fd = mfd_assert_new("", 0, MFD_CLOEXEC); close(fd); - if (!hugetlbfs_test) { - /* verify MFD_ALLOW_SEALING is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); - close(fd); - - /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); - close(fd); - } else { - /* sealing is not supported on hugetlbfs */ - mfd_fail_new("", MFD_ALLOW_SEALING); - } + /* verify MFD_ALLOW_SEALING is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); + close(fd); + + /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); + close(fd); } /* @@ -649,10 +652,6 @@ static void test_basic(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - printf("%s BASIC\n", MEMFD_STR); fd = mfd_assert_new("kern_memfd_basic", @@ -697,28 +696,6 @@ static void test_basic(void) close(fd); } -/* - * hugetlbfs doesn't support seals or write, so just verify grow and shrink - * on a hugetlbfs file created via memfd_create. - */ -static void test_hugetlbfs_grow_shrink(void) -{ - int fd; - - printf("%s HUGETLBFS-GROW-SHRINK\n", MEMFD_STR); - - fd = mfd_assert_new("kern_memfd_seal_write", - mfd_def_size, - MFD_CLOEXEC); - - mfd_assert_read(fd); - mfd_assert_write(fd); - mfd_assert_shrink(fd); - mfd_assert_grow(fd); - - close(fd); -} - /* * Test SEAL_WRITE * Test whether SEAL_WRITE actually prevents modifications. @@ -727,13 +704,6 @@ static void test_seal_write(void) { int fd; - /* - * hugetlbfs does not contain sealing or write support. Just test - * basic grow and shrink via test_hugetlbfs_grow_shrink. - */ - if (hugetlbfs_test) - return test_hugetlbfs_grow_shrink(); - printf("%s SEAL-WRITE\n", MEMFD_STR); fd = mfd_assert_new("kern_memfd_seal_write", @@ -760,10 +730,6 @@ static void test_seal_shrink(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - printf("%s SEAL-SHRINK\n", MEMFD_STR); fd = mfd_assert_new("kern_memfd_seal_shrink", @@ -790,10 +756,6 @@ static void test_seal_grow(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - printf("%s SEAL-GROW\n", MEMFD_STR); fd = mfd_assert_new("kern_memfd_seal_grow", @@ -820,10 +782,6 @@ static void test_seal_resize(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - printf("%s SEAL-RESIZE\n", MEMFD_STR); fd = mfd_assert_new("kern_memfd_seal_resize", @@ -842,32 +800,6 @@ static void test_seal_resize(void) close(fd); } -/* - * hugetlbfs does not support seals. Basic test to dup the memfd created - * fd and perform some basic operations on it. - */ -static void hugetlbfs_dup(char *b_suffix) -{ - int fd, fd2; - - printf("%s HUGETLBFS-DUP %s\n", MEMFD_STR, b_suffix); - - fd = mfd_assert_new("kern_memfd_share_dup", - mfd_def_size, - MFD_CLOEXEC); - - fd2 = mfd_assert_dup(fd); - - mfd_assert_read(fd); - mfd_assert_write(fd); - - mfd_assert_shrink(fd2); - mfd_assert_grow(fd2); - - close(fd2); - close(fd); -} - /* * Test sharing via dup() * Test that seals are shared between dupped FDs and they're all equal. @@ -876,15 +808,6 @@ static void test_share_dup(char *banner, char *b_suffix) { int fd, fd2; - /* - * hugetlbfs does not contain sealing support. Perform some - * basic testing on dup'ed fd instead via hugetlbfs_dup. - */ - if (hugetlbfs_test) { - hugetlbfs_dup(b_suffix); - return; - } - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_dup", @@ -927,10 +850,6 @@ static void test_share_mmap(char *banner, char *b_suffix) int fd; void *p; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_mmap", @@ -955,32 +874,6 @@ static void test_share_mmap(char *banner, char *b_suffix) close(fd); } -/* - * Basic test to make sure we can open the hugetlbfs fd via /proc and - * perform some simple operations on it. - */ -static void hugetlbfs_proc_open(char *b_suffix) -{ - int fd, fd2; - - printf("%s HUGETLBFS-PROC-OPEN %s\n", MEMFD_STR, b_suffix); - - fd = mfd_assert_new("kern_memfd_share_open", - mfd_def_size, - MFD_CLOEXEC); - - fd2 = mfd_assert_open(fd, O_RDWR, 0); - - mfd_assert_read(fd); - mfd_assert_write(fd); - - mfd_assert_shrink(fd2); - mfd_assert_grow(fd2); - - close(fd2); - close(fd); -} - /* * Test sealing with open(/proc/self/fd/%d) * Via /proc we can get access to a separate file-context for the same memfd. @@ -991,15 +884,6 @@ static void test_share_open(char *banner, char *b_suffix) { int fd, fd2; - /* - * hugetlbfs does not contain sealing support. So test basic - * functionality of using /proc fd via hugetlbfs_proc_open - */ - if (hugetlbfs_test) { - hugetlbfs_proc_open(b_suffix); - return; - } - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_open", @@ -1043,10 +927,6 @@ static void test_share_fork(char *banner, char *b_suffix) int fd; pid_t pid; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_fork", -- cgit v1.2.3 From 3037aeb99134b0907fe0901055570a329b1f583d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 31 Jan 2018 16:19:36 -0800 Subject: memfd-test: add 'memfd-hugetlb:' prefix when testing hugetlbfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: http://lkml.kernel.org/r/20171107122800.25517-8-marcandre.lureau@redhat.com Suggested-by: Mike Kravetz Signed-off-by: Marc-André Lureau Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Michal Hocko Cc: David Herrmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/memfd/memfd_test.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 59ca090e9752..910c55f858bb 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -20,6 +20,7 @@ #include #define MEMFD_STR "memfd:" +#define MEMFD_HUGE_STR "memfd-hugetlb:" #define SHARED_FT_STR "(shared file-table)" #define MFD_DEF_SIZE 8192 @@ -30,6 +31,7 @@ */ static int hugetlbfs_test; static size_t mfd_def_size = MFD_DEF_SIZE; +static const char *memfd_str = MEMFD_STR; /* * Copied from mlock2-tests.c @@ -606,7 +608,7 @@ static void test_create(void) char buf[2048]; int fd; - printf("%s CREATE\n", MEMFD_STR); + printf("%s CREATE\n", memfd_str); /* test NULL name */ mfd_fail_new(NULL, 0); @@ -652,7 +654,7 @@ static void test_basic(void) { int fd; - printf("%s BASIC\n", MEMFD_STR); + printf("%s BASIC\n", memfd_str); fd = mfd_assert_new("kern_memfd_basic", mfd_def_size, @@ -704,7 +706,7 @@ static void test_seal_write(void) { int fd; - printf("%s SEAL-WRITE\n", MEMFD_STR); + printf("%s SEAL-WRITE\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_write", mfd_def_size, @@ -730,7 +732,7 @@ static void test_seal_shrink(void) { int fd; - printf("%s SEAL-SHRINK\n", MEMFD_STR); + printf("%s SEAL-SHRINK\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_shrink", mfd_def_size, @@ -756,7 +758,7 @@ static void test_seal_grow(void) { int fd; - printf("%s SEAL-GROW\n", MEMFD_STR); + printf("%s SEAL-GROW\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_grow", mfd_def_size, @@ -782,7 +784,7 @@ static void test_seal_resize(void) { int fd; - printf("%s SEAL-RESIZE\n", MEMFD_STR); + printf("%s SEAL-RESIZE\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_resize", mfd_def_size, @@ -808,7 +810,7 @@ static void test_share_dup(char *banner, char *b_suffix) { int fd, fd2; - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_dup", mfd_def_size, @@ -850,7 +852,7 @@ static void test_share_mmap(char *banner, char *b_suffix) int fd; void *p; - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_mmap", mfd_def_size, @@ -884,7 +886,7 @@ static void test_share_open(char *banner, char *b_suffix) { int fd, fd2; - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_open", mfd_def_size, @@ -927,7 +929,7 @@ static void test_share_fork(char *banner, char *b_suffix) int fd; pid_t pid; - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_fork", mfd_def_size, @@ -963,7 +965,11 @@ int main(int argc, char **argv) } hugetlbfs_test = 1; + memfd_str = MEMFD_HUGE_STR; mfd_def_size = hpage_size * 2; + } else { + printf("Unknown option: %s\n", argv[1]); + abort(); } } -- cgit v1.2.3 From 29f34d1dd6657a0c7da875deb57775c67ff6bd86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 31 Jan 2018 16:19:40 -0800 Subject: memfd-test: move common code to a shared unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memfd & fuse tests will share more common code in the following commits to test hugetlb support. Link: http://lkml.kernel.org/r/20171107122800.25517-9-marcandre.lureau@redhat.com Signed-off-by: Marc-André Lureau Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Michal Hocko Cc: David Herrmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/memfd/Makefile | 5 ++++ tools/testing/selftests/memfd/common.c | 46 ++++++++++++++++++++++++++++++ tools/testing/selftests/memfd/common.h | 9 ++++++ tools/testing/selftests/memfd/fuse_test.c | 8 ++---- tools/testing/selftests/memfd/memfd_test.c | 36 ++--------------------- 5 files changed, 64 insertions(+), 40 deletions(-) create mode 100644 tools/testing/selftests/memfd/common.c create mode 100644 tools/testing/selftests/memfd/common.h (limited to 'tools') diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 3926a0409dda..a5276a91dfbf 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -12,3 +12,8 @@ fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) include ../lib.mk $(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs) + +$(OUTPUT)/memfd_test: memfd_test.c common.o +$(OUTPUT)/fuse_test: fuse_test.c common.o + +EXTRA_CLEAN = common.o diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c new file mode 100644 index 000000000000..8eb3d75f6e60 --- /dev/null +++ b/tools/testing/selftests/memfd/common.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include +#include +#include +#include +#include +#include + +#include "common.h" + +int hugetlbfs_test = 0; + +/* + * Copied from mlock2-tests.c + */ +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +int sys_memfd_create(const char *name, unsigned int flags) +{ + if (hugetlbfs_test) + flags |= MFD_HUGETLB; + + return syscall(__NR_memfd_create, name, flags); +} diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h new file mode 100644 index 000000000000..522d2c630bd8 --- /dev/null +++ b/tools/testing/selftests/memfd/common.h @@ -0,0 +1,9 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +extern int hugetlbfs_test; + +unsigned long default_huge_page_size(void); +int sys_memfd_create(const char *name, unsigned int flags); + +#endif diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 1ccb7a3eb14b..795a25ba8521 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -33,15 +33,11 @@ #include #include +#include "common.h" + #define MFD_DEF_SIZE 8192 #define STACK_SIZE 65536 -static int sys_memfd_create(const char *name, - unsigned int flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { int r, fd; diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 910c55f858bb..10baa1652fc2 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -19,6 +19,8 @@ #include #include +#include "common.h" + #define MEMFD_STR "memfd:" #define MEMFD_HUGE_STR "memfd-hugetlb:" #define SHARED_FT_STR "(shared file-table)" @@ -29,43 +31,9 @@ /* * Default is not to test hugetlbfs */ -static int hugetlbfs_test; static size_t mfd_def_size = MFD_DEF_SIZE; static const char *memfd_str = MEMFD_STR; -/* - * Copied from mlock2-tests.c - */ -static unsigned long default_huge_page_size(void) -{ - unsigned long hps = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - - if (!f) - return 0; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { - hps <<= 10; - break; - } - } - - free(line); - fclose(f); - return hps; -} - -static int sys_memfd_create(const char *name, - unsigned int flags) -{ - if (hugetlbfs_test) - flags |= MFD_HUGETLB; - - return syscall(__NR_memfd_create, name, flags); -} - static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { int r, fd; -- cgit v1.2.3 From c5c63835e5713b09fc974241db47956362a63efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 31 Jan 2018 16:19:44 -0800 Subject: memfd-test: run fuse test on hugetlb backend memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: http://lkml.kernel.org/r/20171107122800.25517-10-marcandre.lureau@redhat.com Signed-off-by: Marc-André Lureau Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Michal Hocko Cc: David Herrmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/memfd/fuse_test.c | 38 ++++++++++++++++++++------ tools/testing/selftests/memfd/run_fuse_test.sh | 2 +- tools/testing/selftests/memfd/run_tests.sh | 1 + 3 files changed, 32 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 795a25ba8521..b018e835737d 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -38,6 +38,8 @@ #define MFD_DEF_SIZE 8192 #define STACK_SIZE 65536 +static size_t mfd_def_size = MFD_DEF_SIZE; + static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { int r, fd; @@ -123,7 +125,7 @@ static void *mfd_assert_mmap_shared(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, @@ -141,7 +143,7 @@ static void *mfd_assert_mmap_private(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, @@ -174,7 +176,7 @@ static int sealing_thread_fn(void *arg) usleep(200000); /* unmount mapping before sealing to avoid i_mmap_writable failures */ - munmap(global_p, MFD_DEF_SIZE); + munmap(global_p, mfd_def_size); /* Try sealing the global file; expect EBUSY or success. Current * kernels will never succeed, but in the future, kernels might @@ -224,7 +226,7 @@ static void join_sealing_thread(pid_t pid) int main(int argc, char **argv) { - static const char zero[MFD_DEF_SIZE]; + char *zero; int fd, mfd, r; void *p; int was_sealed; @@ -235,6 +237,25 @@ int main(int argc, char **argv) abort(); } + if (argc >= 3) { + if (!strcmp(argv[2], "hugetlbfs")) { + unsigned long hpage_size = default_huge_page_size(); + + if (!hpage_size) { + printf("Unable to determine huge page size\n"); + abort(); + } + + hugetlbfs_test = 1; + mfd_def_size = hpage_size * 2; + } else { + printf("Unknown option: %s\n", argv[2]); + abort(); + } + } + + zero = calloc(sizeof(*zero), mfd_def_size); + /* open FUSE memfd file for GUP testing */ printf("opening: %s\n", argv[1]); fd = open(argv[1], O_RDONLY | O_CLOEXEC); @@ -245,7 +266,7 @@ int main(int argc, char **argv) /* create new memfd-object */ mfd = mfd_assert_new("kern_memfd_fuse", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); /* mmap memfd-object for writing */ @@ -264,7 +285,7 @@ int main(int argc, char **argv) * This guarantees that the receive-buffer is pinned for 1s until the * data is written into it. The racing ADD_SEALS should thus fail as * the pages are still pinned. */ - r = read(fd, p, MFD_DEF_SIZE); + r = read(fd, p, mfd_def_size); if (r < 0) { printf("read() failed: %m\n"); abort(); @@ -291,10 +312,10 @@ int main(int argc, char **argv) * enough to avoid any in-flight writes. */ p = mfd_assert_mmap_private(mfd); - if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) { + if (was_sealed && memcmp(p, zero, mfd_def_size)) { printf("memfd sealed during read() but data not discarded\n"); abort(); - } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) { + } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) { printf("memfd sealed after read() but data discarded\n"); abort(); } @@ -303,6 +324,7 @@ int main(int argc, char **argv) close(fd); printf("fuse: DONE\n"); + free(zero); return 0; } diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh index 407df68dfe27..22e572e2d66a 100755 --- a/tools/testing/selftests/memfd/run_fuse_test.sh +++ b/tools/testing/selftests/memfd/run_fuse_test.sh @@ -10,6 +10,6 @@ set -e mkdir mnt ./fuse_mnt ./mnt -./fuse_test ./mnt/memfd +./fuse_test ./mnt/memfd $@ fusermount -u ./mnt rmdir ./mnt diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh index daabb350697c..c2d41ed81b24 100755 --- a/tools/testing/selftests/memfd/run_tests.sh +++ b/tools/testing/selftests/memfd/run_tests.sh @@ -60,6 +60,7 @@ fi # Run the hugetlbfs test # ./memfd_test hugetlbfs +./run_fuse_test.sh hugetlbfs # # Give back any huge pages allocated for the test -- cgit v1.2.3 From c7905f200225d4257536f19b11d18f598fee5f44 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 31 Jan 2018 16:21:23 -0800 Subject: tools, vm: new option to specify kpageflags file page-types currently hardcodes /proc/kpageflags as the file to parse. This works when using the tool to examine the state of pageflags on the same system, but does not allow storing a snapshot of pageflags at a given time to debug issues nor on a different system. This allows the user to specify a saved version of kpageflags with a new page-types -F option. [akpm@linux-foundation.org: add "filename" to fix usage() string] [rientjes@google.com: fix layout] Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1801301840050.140969@chino.kir.corp.google.com Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1801301458180.153857@chino.kir.corp.google.com Signed-off-by: David Rientjes Reviewed-by: Andrew Morton Reviewed-by: Naoya Horiguchi Cc: Konstantin Khlebnikov Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/page-types.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index e92903fc7113..a8783f48f77f 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -169,9 +169,10 @@ static int opt_raw; /* for kernel developers */ static int opt_list; /* list pages (in ranges) */ static int opt_no_summary; /* don't show summary */ static pid_t opt_pid; /* process to walk */ -const char * opt_file; /* file or directory path */ +const char *opt_file; /* file or directory path */ static uint64_t opt_cgroup; /* cgroup inode */ static int opt_list_cgroup;/* list page cgroup */ +static const char *opt_kpageflags;/* kpageflags file to parse */ #define MAX_ADDR_RANGES 1024 static int nr_addr_ranges; @@ -258,7 +259,7 @@ static int checked_open(const char *pathname, int flags) * pagemap/kpageflags routines */ -static unsigned long do_u64_read(int fd, char *name, +static unsigned long do_u64_read(int fd, const char *name, uint64_t *buf, unsigned long index, unsigned long count) @@ -283,7 +284,7 @@ static unsigned long kpageflags_read(uint64_t *buf, unsigned long index, unsigned long pages) { - return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); + return do_u64_read(kpageflags_fd, opt_kpageflags, buf, index, pages); } static unsigned long kpagecgroup_read(uint64_t *buf, @@ -293,7 +294,7 @@ static unsigned long kpagecgroup_read(uint64_t *buf, if (kpagecgroup_fd < 0) return pages; - return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages); + return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages); } static unsigned long pagemap_read(uint64_t *buf, @@ -743,7 +744,7 @@ static void walk_addr_ranges(void) { int i; - kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); if (!nr_addr_ranges) add_addr_range(0, ULONG_MAX); @@ -790,6 +791,7 @@ static void usage(void) " -N|--no-summary Don't show summary info\n" " -X|--hwpoison hwpoison pages\n" " -x|--unpoison unpoison pages\n" +" -F|--kpageflags filename kpageflags file to parse\n" " -h|--help Show this usage message\n" "flags:\n" " 0x10 bitfield format, e.g.\n" @@ -1013,7 +1015,7 @@ static void walk_page_cache(void) { struct stat st; - kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); sigaction(SIGBUS, &sigbus_action, NULL); @@ -1164,6 +1166,11 @@ static void parse_bits_mask(const char *optarg) add_bits_filter(mask, bits); } +static void parse_kpageflags(const char *name) +{ + opt_kpageflags = name; +} + static void describe_flags(const char *optarg) { uint64_t flags = parse_flag_names(optarg, 0); @@ -1188,6 +1195,7 @@ static const struct option opts[] = { { "no-summary", 0, NULL, 'N' }, { "hwpoison" , 0, NULL, 'X' }, { "unpoison" , 0, NULL, 'x' }, + { "kpageflags", 0, NULL, 'F' }, { "help" , 0, NULL, 'h' }, { NULL , 0, NULL, 0 } }; @@ -1199,7 +1207,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); while ((c = getopt_long(argc, argv, - "rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) { + "rp:f:a:b:d:c:ClLNXxF:h", opts, NULL)) != -1) { switch (c) { case 'r': opt_raw = 1; @@ -1242,6 +1250,9 @@ int main(int argc, char *argv[]) opt_unpoison = 1; prepare_hwpoison_fd(); break; + case 'F': + parse_kpageflags(optarg); + break; case 'h': usage(); exit(0); @@ -1251,6 +1262,9 @@ int main(int argc, char *argv[]) } } + if (!opt_kpageflags) + opt_kpageflags = PROC_KPAGEFLAGS; + if (opt_cgroup || opt_list_cgroup) kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY); -- cgit v1.2.3 From 62a06994ced17f295fc51ea0815580ee7ccb668d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 29 Jan 2018 21:23:28 -0800 Subject: tools/bpf: permit selftests/bpf to be built in a different directory Fix a couple of issues at tools/testing/selftests/bpf/Makefile so the following command make -C tools/testing/selftests/bpf OUTPUT=/home/yhs/tmp can put the built results into a different directory. Also add the built binary test_tcpbpf_user in the .gitignore file. Fixes: 6882804c916b ("selftests/bpf: add a test for overlapping packet range checks") Fixes: 9d1f15941967 ("bpf: move cgroup_helpers from samples/bpf/ to tools/testing/selftesting/bpf/") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1e09d77f1948..cc15af2e54fe 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -8,5 +8,6 @@ fixdep test_align test_dev_cgroup test_progs +test_tcpbpf_user test_verifier_log feature diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bf05bc5e36e5..566d6adc172a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -27,7 +27,7 @@ TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ include ../lib.mk -BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c +BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c $(TEST_GEN_PROGS): $(BPFOBJ) @@ -58,7 +58,7 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline -%.o: %.c +$(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ -- cgit v1.2.3 From 03ee47ae8a7c608975be7e45287bff0482e295d6 Mon Sep 17 00:00:00 2001 From: Peter Malone Date: Thu, 16 Feb 2017 15:42:26 -0500 Subject: ringtest: ring.c malloc & memset to calloc Code cleanup change - moving from malloc & memset to calloc. Signed-off-by: Peter Malone Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/ring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c index 2b9859beea65..5a41404aaef5 100644 --- a/tools/virtio/ringtest/ring.c +++ b/tools/virtio/ringtest/ring.c @@ -84,12 +84,11 @@ void alloc_ring(void) perror("Unable to allocate ring buffer.\n"); exit(3); } - event = malloc(sizeof *event); + event = calloc(1, sizeof(*event)); if (!event) { perror("Unable to allocate event buffer.\n"); exit(3); } - memset(event, 0, sizeof *event); guest.avail_idx = 0; guest.kicked_avail_idx = -1; guest.last_used_idx = 0; @@ -102,12 +101,11 @@ void alloc_ring(void) ring[i] = desc; } guest.num_free = ring_size; - data = malloc(ring_size * sizeof *data); + data = calloc(ring_size, sizeof(*data)); if (!data) { perror("Unable to allocate data buffer.\n"); exit(3); } - memset(data, 0, ring_size * sizeof *data); } /* guest side */ -- cgit v1.2.3 From 65073a67331de3d2cce35607807ddec284e75e81 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 31 Jan 2018 12:58:56 +0100 Subject: bpf: fix null pointer deref in bpf_prog_test_run_xdp syzkaller was able to generate the following XDP program ... (18) r0 = 0x0 (61) r5 = *(u32 *)(r1 +12) (04) (u32) r0 += (u32) 0 (95) exit ... and trigger a NULL pointer dereference in ___bpf_prog_run() via bpf_prog_test_run_xdp() where this was attempted to run. Reason is that recent xdp_rxq_info addition to XDP programs updated all drivers, but not bpf_prog_test_run_xdp(), where xdp_buff is set up. Thus when context rewriter does the deref on the netdev it's NULL at runtime. Fix it by using xdp_rxq from loopback dev. __netif_get_rx_queue() helper can also be reused in various other locations later on. Fixes: 02dd3291b2f0 ("bpf: finally expose xdp_rxq_info to XDP bpf-programs") Reported-by: syzbot+1eb094057b338eb1fc00@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Jesper Dangaard Brouer Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 697bd83de295..c0f16e93f9bd 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -7779,6 +7779,20 @@ static struct bpf_test tests[] = { .errstr = "unknown opcode d7", .result = REJECT, }, + { + "XDP, using ifindex from netdev", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, ingress_ifindex)), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 1, + }, { "meta access, test1", .insns = { -- cgit v1.2.3 From f81e1d35a6e36d30888c46283b8dd1022e847124 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 31 Jan 2018 12:45:55 -0700 Subject: nfit-test: Add platform cap support from ACPI 6.2a to test Adding NFIT platform capabilities sub table in nfit_test simulated ACPI NFIT table. Only the first NFIT table is added with the capability sub-table. Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Signed-off-by: Ross Zwisler --- tools/testing/nvdimm/test/nfit.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 7217b2b953b5..de1373a7ed4f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -881,7 +881,8 @@ static int nfit_test0_alloc(struct nfit_test *t) window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW + (sizeof(struct acpi_nfit_flush_address) - + sizeof(u64) * NUM_HINTS) * NUM_DCR; + + sizeof(u64) * NUM_HINTS) * NUM_DCR + + sizeof(struct acpi_nfit_capabilities); int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -993,6 +994,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; + struct acpi_nfit_capabilities *pcap; unsigned int offset, i; /* @@ -1500,8 +1502,16 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + /* platform capabilities */ + pcap = nfit_buf + offset + flush_hint_size * 4; + pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; + pcap->header.length = sizeof(*pcap); + pcap->highest_capability = 1; + pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | + ACPI_NFIT_CAPABILITY_MEM_FLUSH; + if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4; + offset = offset + flush_hint_size * 4 + sizeof(*pcap); /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; -- cgit v1.2.3 From bfbaa952d1232c6199cdeb4896da67e02a13326d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 1 Feb 2018 17:41:57 -0700 Subject: libnvdimm/nfit_test: add firmware download emulation Adding support in nfit_test for DSM v1.6 firmware update sequence. The test will simulate the flashing of firmware to the DIMM. A bogus version string will be returned as the test has no idea how to parse the firmware binary. Any bogus binary can be used to "update" as the actual binary is not copied into the kernel. Signed-off-by: Dave Jiang Reviewed-by: Vishal Verma [ vishal: also move smart calls into the nd_cmd_call block ] Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 322 +++++++++++++++++++++++++++++++--- tools/testing/nvdimm/test/nfit_test.h | 66 ++++++- 2 files changed, 360 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 2b57254342aa..a043fea4d58d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -137,6 +137,14 @@ static u32 handle[] = { static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +struct nfit_test_fw { + enum intel_fw_update_state state; + u32 context; + u64 version; + u32 size_received; + u64 end_time; +}; + struct nfit_test { struct acpi_nfit_desc acpi_desc; struct platform_device pdev; @@ -172,6 +180,7 @@ struct nfit_test { struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; struct work_struct work; + struct nfit_test_fw *fw; }; static struct workqueue_struct *nfit_wq; @@ -183,6 +192,226 @@ static struct nfit_test *to_nfit_test(struct device *dev) return container_of(pdev, struct nfit_test, pdev); } +static int nd_intel_test_get_fw_info(struct nfit_test *t, + struct nd_intel_fw_info *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE; + nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN; + nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL; + nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME; + nd_cmd->update_cap = 0; + nd_cmd->fis_version = INTEL_FW_FIS_VERSION; + nd_cmd->run_version = 0; + nd_cmd->updated_version = fw->version; + + return 0; +} + +static int nd_intel_test_start_update(struct nfit_test *t, + struct nd_intel_fw_start *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (fw->state != FW_STATE_NEW) { + /* extended status, FW update in progress */ + nd_cmd->status = 0x10007; + return 0; + } + + fw->state = FW_STATE_IN_PROGRESS; + fw->context++; + fw->size_received = 0; + nd_cmd->status = 0; + nd_cmd->context = fw->context; + + dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context); + + return 0; +} + +static int nd_intel_test_send_data(struct nfit_test *t, + struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + + dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status); + dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]); + dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1, + nd_cmd->data[nd_cmd->length-1]); + + if (fw->state != FW_STATE_IN_PROGRESS) { + dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__); + *status = 0x5; + return 0; + } + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + *status = 0x10007; + return 0; + } + + /* + * check offset + len > size of fw storage + * check length is > max send length + */ + if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE || + nd_cmd->length > INTEL_FW_MAX_SEND_LEN) { + *status = 0x3; + dev_dbg(dev, "%s: buffer boundary violation\n", __func__); + return 0; + } + + fw->size_received += nd_cmd->length; + dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n", + __func__, nd_cmd->length, fw->size_received); + *status = 0; + return 0; +} + +static int nd_intel_test_finish_fw(struct nfit_test *t, + struct nd_intel_fw_finish_update *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (fw->state == FW_STATE_UPDATED) { + /* update already done, need cold boot */ + nd_cmd->status = 0x20007; + return 0; + } + + dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n", + __func__, nd_cmd->context, nd_cmd->ctrl_flags); + + switch (nd_cmd->ctrl_flags) { + case 0: /* finish */ + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, + fw->context); + nd_cmd->status = 0x10007; + return 0; + } + nd_cmd->status = 0; + fw->state = FW_STATE_VERIFY; + /* set 1 second of time for firmware "update" */ + fw->end_time = jiffies + HZ; + break; + + case 1: /* abort */ + fw->size_received = 0; + /* successfully aborted status */ + nd_cmd->status = 0x40007; + fw->state = FW_STATE_NEW; + dev_dbg(dev, "%s: abort successful\n", __func__); + break; + + default: /* bad control flag */ + dev_warn(dev, "%s: unknown control flag: %#x\n", + __func__, nd_cmd->ctrl_flags); + return -EINVAL; + } + + return 0; +} + +static int nd_intel_test_finish_query(struct nfit_test *t, + struct nd_intel_fw_finish_query *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + nd_cmd->status = 0x10007; + return 0; + } + + dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context); + + switch (fw->state) { + case FW_STATE_NEW: + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0; + dev_dbg(dev, "%s: new state\n", __func__); + break; + + case FW_STATE_IN_PROGRESS: + /* sequencing error */ + nd_cmd->status = 0x40007; + nd_cmd->updated_fw_rev = 0; + dev_dbg(dev, "%s: sequence error\n", __func__); + break; + + case FW_STATE_VERIFY: + if (time_is_after_jiffies64(fw->end_time)) { + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0x20007; + dev_dbg(dev, "%s: still verifying\n", __func__); + break; + } + + dev_dbg(dev, "%s: transition out verify\n", __func__); + fw->state = FW_STATE_UPDATED; + /* we are going to fall through if it's "done" */ + case FW_STATE_UPDATED: + nd_cmd->status = 0; + /* bogus test version */ + fw->version = nd_cmd->updated_fw_rev = + INTEL_FW_FAKE_VERSION; + dev_dbg(dev, "%s: updated\n", __func__); + break; + + default: /* we should never get here */ + return -EINVAL; + } + + return 0; +} + static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, unsigned int buf_len) { @@ -592,6 +821,23 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t, return 0; } +static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) +{ + int i; + + /* lookup per-dimm data */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + if ((1 << func) & dimm_fail_cmd_flags[i]) + return -EIO; + + return i; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -620,22 +866,54 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, func = call_pkg->nd_command; if (call_pkg->nd_family != nfit_mem->family) return -ENOTTY; + + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; + + switch (func) { + case ND_INTEL_FW_GET_INFO: + return nd_intel_test_get_fw_info(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_START_UPDATE: + return nd_intel_test_start_update(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_SEND_DATA: + return nd_intel_test_send_data(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_UPDATE: + return nd_intel_test_finish_fw(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_QUERY: + return nd_intel_test_finish_query(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_SMART: + return nfit_test_cmd_smart(buf, buf_len, + &t->smart[i - t->dcr_idx]); + case ND_INTEL_SMART_THRESHOLD: + return nfit_test_cmd_smart_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx]); + case ND_INTEL_SMART_SET_THRESHOLD: + return nfit_test_cmd_smart_set_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); + default: + return -ENOTTY; + } } if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; - /* lookup per-dimm data */ - for (i = 0; i < ARRAY_SIZE(handle); i++) - if (__to_nfit_memdev(nfit_mem)->device_handle == - handle[i]) - break; - if (i >= ARRAY_SIZE(handle)) - return -ENXIO; - - if ((1 << func) & dimm_fail_cmd_flags[i]) - return -EIO; + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; switch (func) { case ND_CMD_GET_CONFIG_SIZE: @@ -649,20 +927,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i - t->dcr_idx]); break; - case ND_INTEL_SMART: - rc = nfit_test_cmd_smart(buf, buf_len, - &t->smart[i - t->dcr_idx]); - break; - case ND_INTEL_SMART_THRESHOLD: - rc = nfit_test_cmd_smart_threshold(buf, buf_len, - &t->smart_threshold[i - t->dcr_idx]); - break; - case ND_INTEL_SMART_SET_THRESHOLD: - rc = nfit_test_cmd_smart_set_threshold(buf, buf_len, - &t->smart_threshold[i - t->dcr_idx], - &t->smart[i - t->dcr_idx], - &t->pdev.dev, t->dimm_dev[i]); - break; default: return -ENOTTY; } @@ -1728,6 +1992,11 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -2134,10 +2403,13 @@ static int nfit_test_probe(struct platform_device *pdev) nfit_test->smart_threshold = devm_kcalloc(dev, num, sizeof(struct nd_intel_smart_threshold), GFP_KERNEL); + nfit_test->fw = devm_kcalloc(dev, num, + sizeof(struct nfit_test_fw), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr && nfit_test->dcr_dma && nfit_test->flush - && nfit_test->flush_dma) + && nfit_test->flush_dma + && nfit_test->fw) /* pass */; else return -ENOMEM; diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index ba230f6f7676..be8fa8ec0615 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -84,9 +84,14 @@ struct nd_cmd_ars_err_inj_stat { } __packed record[0]; } __packed; -#define ND_INTEL_SMART 1 -#define ND_INTEL_SMART_THRESHOLD 2 -#define ND_INTEL_SMART_SET_THRESHOLD 17 +#define ND_INTEL_SMART 1 +#define ND_INTEL_SMART_THRESHOLD 2 +#define ND_INTEL_FW_GET_INFO 12 +#define ND_INTEL_FW_START_UPDATE 13 +#define ND_INTEL_FW_SEND_DATA 14 +#define ND_INTEL_FW_FINISH_UPDATE 15 +#define ND_INTEL_FW_FINISH_QUERY 16 +#define ND_INTEL_SMART_SET_THRESHOLD 17 #define ND_INTEL_SMART_HEALTH_VALID (1 << 0) #define ND_INTEL_SMART_SPARES_VALID (1 << 1) @@ -152,6 +157,61 @@ struct nd_intel_smart_set_threshold { __u32 status; } __packed; +#define INTEL_FW_STORAGE_SIZE 0x100000 +#define INTEL_FW_MAX_SEND_LEN 0xFFEC +#define INTEL_FW_QUERY_INTERVAL 250000 +#define INTEL_FW_QUERY_MAX_TIME 3000000 +#define INTEL_FW_FIS_VERSION 0x0105 +#define INTEL_FW_FAKE_VERSION 0xffffffffabcd + +enum intel_fw_update_state { + FW_STATE_NEW = 0, + FW_STATE_IN_PROGRESS, + FW_STATE_VERIFY, + FW_STATE_UPDATED, +}; + +struct nd_intel_fw_info { + __u32 status; + __u32 storage_size; + __u32 max_send_len; + __u32 query_interval; + __u32 max_query_time; + __u8 update_cap; + __u8 reserved[3]; + __u32 fis_version; + __u64 run_version; + __u64 updated_version; +} __packed; + +struct nd_intel_fw_start { + __u32 status; + __u32 context; +} __packed; + +/* this one has the output first because the variable input data size */ +struct nd_intel_fw_send_data { + __u32 context; + __u32 offset; + __u32 length; + __u8 data[0]; +/* this field is not declared due ot variable data from input */ +/* __u32 status; */ +} __packed; + +struct nd_intel_fw_finish_update { + __u8 ctrl_flags; + __u8 reserved[3]; + __u32 context; + __u32 status; +} __packed; + +struct nd_intel_fw_finish_query { + __u32 context; + __u32 status; + __u64 updated_fw_rev; +} __packed; + union acpi_object; typedef void *acpi_handle; -- cgit v1.2.3 From 674d8bdec770d40288574534eab27d82bdf16b0e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 1 Feb 2018 17:41:58 -0700 Subject: libnvdimm/nfit_test: adding support for unit testing enable LSS status Adding support code to simulate the enabling of LSS status in support of the Intel DSM v1.6 Function Index 10: Enable Latch System Shutdown Status. This is only for testing of libndctl support for LSS enable. The actual functionality requires a reboot and therefore is not simulated. The enable value is not recorded in nfit_test since there's no DSM to actually query the current status of the LSS enable. Signed-off-by: Dave Jiang Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 34 ++++++++++++++++++++++++++++++++++ tools/testing/nvdimm/test/nfit_test.h | 6 ++++++ 2 files changed, 40 insertions(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a043fea4d58d..42ee1798971d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -821,6 +821,35 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t, return 0; } +static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t, + struct nd_intel_lss *nd_cmd, unsigned int buf_len) +{ + struct device *dev = &t->pdev.dev; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + switch (nd_cmd->enable) { + case 0: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n", + __func__); + break; + case 1: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n", + __func__); + break; + default: + dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable); + nd_cmd->status = 0x3; + break; + } + + + return 0; +} + static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) { int i; @@ -872,6 +901,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, return i; switch (func) { + case ND_INTEL_ENABLE_LSS_STATUS: + return nd_intel_test_cmd_set_lss_status(t, + buf, buf_len); case ND_INTEL_FW_GET_INFO: return nd_intel_test_get_fw_info(t, buf, buf_len, i - t->dcr_idx); @@ -1997,6 +2029,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -2094,6 +2127,7 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index be8fa8ec0615..428344519cdf 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -86,6 +86,7 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART 1 #define ND_INTEL_SMART_THRESHOLD 2 +#define ND_INTEL_ENABLE_LSS_STATUS 10 #define ND_INTEL_FW_GET_INFO 12 #define ND_INTEL_FW_START_UPDATE 13 #define ND_INTEL_FW_SEND_DATA 14 @@ -212,6 +213,11 @@ struct nd_intel_fw_finish_query { __u64 updated_fw_rev; } __packed; +struct nd_intel_lss { + __u8 enable; + __u32 status; +} __packed; + union acpi_object; typedef void *acpi_handle; -- cgit v1.2.3 From 0fb5c8df609eaca3cb7c24e7f91470f8dd5984ec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Feb 2018 12:28:54 -0800 Subject: tools/testing/nvdimm: force nfit_test to depend on instrumented modules The libnvdimm unit tests will fail when they are run against the production / in-tree version of libnvdimm.ko or nfit.ko due to symbols not being mocked per nfit_test's expectation. For example, nfit_test expects acpi_evaluate_dsm() to be replaced by __wrap_acpi_evaluate_dsm() to test how acpi_nfit_ctl() responds to different stimuli. Create a test-only symbol name that nfit_test links against to cause module load failures when the wrong module is present. For example, with this change, attempts to use the wrong module will report: nfit_test: Unknown symbol libnvdimm_test (err 0) Reported-by: Dave Jiang Reported-by: Vishal Verma Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 4 ++++ tools/testing/nvdimm/acpi_nfit_test.c | 8 ++++++++ tools/testing/nvdimm/device_dax_test.c | 8 ++++++++ tools/testing/nvdimm/libnvdimm_test.c | 8 ++++++++ tools/testing/nvdimm/pmem_test.c | 8 ++++++++ tools/testing/nvdimm/test/nfit.c | 6 ++++++ tools/testing/nvdimm/watermark.h | 21 +++++++++++++++++++++ 7 files changed, 63 insertions(+) create mode 100644 tools/testing/nvdimm/acpi_nfit_test.c create mode 100644 tools/testing/nvdimm/device_dax_test.c create mode 100644 tools/testing/nvdimm/libnvdimm_test.c create mode 100644 tools/testing/nvdimm/pmem_test.c create mode 100644 tools/testing/nvdimm/watermark.h (limited to 'tools') diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index db33b28c5ef3..0392153a0009 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o +nfit-y += acpi_nfit_test.o nfit-y += config_check.o nd_pmem-y := $(NVDIMM_SRC)/pmem.o nd_pmem-y += pmem-dax.o +nd_pmem-y += pmem_test.o nd_pmem-y += config_check.o nd_btt-y := $(NVDIMM_SRC)/btt.o @@ -57,6 +59,7 @@ dax-y += config_check.o device_dax-y := $(DAX_SRC)/device.o device_dax-y += dax-dev.o +device_dax-y += device_dax_test.o device_dax-y += config_check.o dax_pmem-y := $(DAX_SRC)/pmem.o @@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o +libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o obj-m += test/ diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c new file mode 100644 index 000000000000..43521512e577 --- /dev/null +++ b/tools/testing/nvdimm/acpi_nfit_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include +#include +#include "watermark.h" + +nfit_test_watermark(acpi_nfit); diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c new file mode 100644 index 000000000000..24b17bf42429 --- /dev/null +++ b/tools/testing/nvdimm/device_dax_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include +#include +#include "watermark.h" + +nfit_test_watermark(device_dax); diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c new file mode 100644 index 000000000000..00ca30b23932 --- /dev/null +++ b/tools/testing/nvdimm/libnvdimm_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include +#include +#include "watermark.h" + +nfit_test_watermark(libnvdimm); diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c new file mode 100644 index 000000000000..fd38f92275cf --- /dev/null +++ b/tools/testing/nvdimm/pmem_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include +#include +#include "watermark.h" + +nfit_test_watermark(pmem); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 42ee1798971d..450b4cbba6b6 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -27,6 +27,7 @@ #include #include #include "nfit_test.h" +#include "../watermark.h" /* * Generate an NFIT table to describe the following topology: @@ -2545,6 +2546,11 @@ static __init int nfit_test_init(void) { int rc, i; + pmem_test(); + libnvdimm_test(); + acpi_nfit_test(); + device_dax_test(); + nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); nfit_wq = create_singlethread_workqueue("nfit"); diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h new file mode 100644 index 000000000000..ed0528757bd4 --- /dev/null +++ b/tools/testing/nvdimm/watermark.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. +#ifndef _TEST_NVDIMM_WATERMARK_H_ +#define _TEST_NVDIMM_WATERMARK_H_ +int pmem_test(void); +int libnvdimm_test(void); +int acpi_nfit_test(void); +int device_dax_test(void); + +/* + * dummy routine for nfit_test to validate it is linking to the properly + * mocked module and not the standard one from the base tree. + */ +#define nfit_test_watermark(x) \ +int x##_test(void) \ +{ \ + pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \ + return 0; \ +} \ +EXPORT_SYMBOL(x##_test) +#endif /* _TEST_NVDIMM_WATERMARK_H_ */ -- cgit v1.2.3 From dc2b9f19e3bdaa87a7c3d123b8bba8a42d96d942 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 30 Jan 2018 21:55:00 +0100 Subject: tools: add netlink.h and if_link.h in tools uapi The headers are necessary for libbpf compilation on system with older version of the headers. Signed-off-by: Eric Leblond Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/if_link.h | 943 +++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/netlink.h | 251 ++++++++++ tools/lib/bpf/Makefile | 6 + 3 files changed, 1200 insertions(+) create mode 100644 tools/include/uapi/linux/if_link.h create mode 100644 tools/include/uapi/linux/netlink.h (limited to 'tools') diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h new file mode 100644 index 000000000000..8616131e2c61 --- /dev/null +++ b/tools/include/uapi/linux/if_link.h @@ -0,0 +1,943 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IF_LINK_H +#define _UAPI_LINUX_IF_LINK_H + +#include +#include + +/* This struct should be in sync with struct rtnl_link_stats64 */ +struct rtnl_link_stats { + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; + + __u32 rx_nohandler; /* dropped, no handler found */ +}; + +/* The main device statistics structure */ +struct rtnl_link_stats64 { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ + __u64 multicast; /* multicast packets received */ + __u64 collisions; + + /* detailed rx_errors: */ + __u64 rx_length_errors; + __u64 rx_over_errors; /* receiver ring buff overflow */ + __u64 rx_crc_errors; /* recved pkt with crc error */ + __u64 rx_frame_errors; /* recv'd frame alignment error */ + __u64 rx_fifo_errors; /* recv'r fifo overrun */ + __u64 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u64 tx_aborted_errors; + __u64 tx_carrier_errors; + __u64 tx_fifo_errors; + __u64 tx_heartbeat_errors; + __u64 tx_window_errors; + + /* for cslip etc */ + __u64 rx_compressed; + __u64 tx_compressed; + + __u64 rx_nohandler; /* dropped, no handler found */ +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap { + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + +enum { + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO + IFLA_NET_NS_PID, + IFLA_IFALIAS, + IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ + IFLA_VFINFO_LIST, + IFLA_STATS64, + IFLA_VF_PORTS, + IFLA_PORT_SELF, + IFLA_AF_SPEC, + IFLA_GROUP, /* Group the device belongs to */ + IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ + IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ +#define IFLA_PROMISCUITY IFLA_PROMISCUITY + IFLA_NUM_TX_QUEUES, + IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, + IFLA_PHYS_PORT_ID, + IFLA_CARRIER_CHANGES, + IFLA_PHYS_SWITCH_ID, + IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, + IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, + IFLA_PAD, + IFLA_XDP, + IFLA_EVENT, + IFLA_NEW_NETNSID, + IFLA_IF_NETNSID, + IFLA_CARRIER_UP_COUNT, + IFLA_CARRIER_DOWN_COUNT, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) +#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) +#endif + +enum { + IFLA_INET_UNSPEC, + IFLA_INET_CONF, + __IFLA_INET_MAX, +}; + +#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum { + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ + IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +enum in6_addr_gen_mode { + IN6_ADDR_GEN_MODE_EUI64, + IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, +}; + +/* Bridge section */ + +enum { + IFLA_BR_UNSPEC, + IFLA_BR_FORWARD_DELAY, + IFLA_BR_HELLO_TIME, + IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, + IFLA_BR_GROUP_FWD_MASK, + IFLA_BR_ROOT_ID, + IFLA_BR_BRIDGE_ID, + IFLA_BR_ROOT_PORT, + IFLA_BR_ROOT_PATH_COST, + IFLA_BR_TOPOLOGY_CHANGE, + IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + IFLA_BR_HELLO_TIMER, + IFLA_BR_TCN_TIMER, + IFLA_BR_TOPOLOGY_CHANGE_TIMER, + IFLA_BR_GC_TIMER, + IFLA_BR_GROUP_ADDR, + IFLA_BR_FDB_FLUSH, + IFLA_BR_MCAST_ROUTER, + IFLA_BR_MCAST_SNOOPING, + IFLA_BR_MCAST_QUERY_USE_IFADDR, + IFLA_BR_MCAST_QUERIER, + IFLA_BR_MCAST_HASH_ELASTICITY, + IFLA_BR_MCAST_HASH_MAX, + IFLA_BR_MCAST_LAST_MEMBER_CNT, + IFLA_BR_MCAST_STARTUP_QUERY_CNT, + IFLA_BR_MCAST_LAST_MEMBER_INTVL, + IFLA_BR_MCAST_MEMBERSHIP_INTVL, + IFLA_BR_MCAST_QUERIER_INTVL, + IFLA_BR_MCAST_QUERY_INTVL, + IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, + IFLA_BR_MCAST_STARTUP_QUERY_INTVL, + IFLA_BR_NF_CALL_IPTABLES, + IFLA_BR_NF_CALL_IP6TABLES, + IFLA_BR_NF_CALL_ARPTABLES, + IFLA_BR_VLAN_DEFAULT_PVID, + IFLA_BR_PAD, + IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, + __IFLA_BR_MAX, +}; + +#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) + +struct ifla_bridge_id { + __u8 prio[2]; + __u8 addr[6]; /* ETH_ALEN */ +}; + +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ + IFLA_BRPORT_LEARNING, /* mac learning */ + IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ + IFLA_BRPORT_PROXYARP, /* proxy ARP */ + IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ + IFLA_BRPORT_ROOT_ID, /* designated root */ + IFLA_BRPORT_BRIDGE_ID, /* designated bridge */ + IFLA_BRPORT_DESIGNATED_PORT, + IFLA_BRPORT_DESIGNATED_COST, + IFLA_BRPORT_ID, + IFLA_BRPORT_NO, + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, + IFLA_BRPORT_CONFIG_PENDING, + IFLA_BRPORT_MESSAGE_AGE_TIMER, + IFLA_BRPORT_FORWARD_DELAY_TIMER, + IFLA_BRPORT_HOLD_TIMER, + IFLA_BRPORT_FLUSH, + IFLA_BRPORT_MULTICAST_ROUTER, + IFLA_BRPORT_PAD, + IFLA_BRPORT_MCAST_FLOOD, + IFLA_BRPORT_MCAST_TO_UCAST, + IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_GROUP_FWD_MASK, + IFLA_BRPORT_NEIGH_SUPPRESS, + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + +struct ifla_cacheinfo { + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; + +enum { + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + IFLA_INFO_SLAVE_KIND, + IFLA_INFO_SLAVE_DATA, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + +/* VLAN section */ + +enum { + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + IFLA_VLAN_PROTOCOL, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum { + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping { + __u32 from; + __u32 to; +}; + +/* MACVLAN section */ +enum { + IFLA_MACVLAN_UNSPEC, + IFLA_MACVLAN_MODE, + IFLA_MACVLAN_FLAGS, + IFLA_MACVLAN_MACADDR_MODE, + IFLA_MACVLAN_MACADDR, + IFLA_MACVLAN_MACADDR_DATA, + IFLA_MACVLAN_MACADDR_COUNT, + __IFLA_MACVLAN_MAX, +}; + +#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) + +enum macvlan_mode { + MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ + MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ + MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ + MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ + MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ +}; + +enum macvlan_macaddr_mode { + MACVLAN_MACADDR_ADD, + MACVLAN_MACADDR_DEL, + MACVLAN_MACADDR_FLUSH, + MACVLAN_MACADDR_SET, +}; + +#define MACVLAN_FLAG_NOPROMISC 1 + +/* VRF section */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; + +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) + +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + +/* MACSEC section */ +enum { + IFLA_MACSEC_UNSPEC, + IFLA_MACSEC_SCI, + IFLA_MACSEC_PORT, + IFLA_MACSEC_ICV_LEN, + IFLA_MACSEC_CIPHER_SUITE, + IFLA_MACSEC_WINDOW, + IFLA_MACSEC_ENCODING_SA, + IFLA_MACSEC_ENCRYPT, + IFLA_MACSEC_PROTECT, + IFLA_MACSEC_INC_SCI, + IFLA_MACSEC_ES, + IFLA_MACSEC_SCB, + IFLA_MACSEC_REPLAY_PROTECT, + IFLA_MACSEC_VALIDATION, + IFLA_MACSEC_PAD, + __IFLA_MACSEC_MAX, +}; + +#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) + +enum macsec_validation_type { + MACSEC_VALIDATE_DISABLED = 0, + MACSEC_VALIDATE_CHECK = 1, + MACSEC_VALIDATE_STRICT = 2, + __MACSEC_VALIDATE_END, + MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, +}; + +/* IPVLAN section */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + IFLA_IPVLAN_FLAGS, + __IFLA_IPVLAN_MAX +}; + +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) + +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, + IPVLAN_MODE_MAX +}; + +#define IPVLAN_F_PRIVATE 0x01 +#define IPVLAN_F_VEPA 0x02 + +/* VXLAN section */ +enum { + IFLA_VXLAN_UNSPEC, + IFLA_VXLAN_ID, + IFLA_VXLAN_GROUP, /* group or remote address */ + IFLA_VXLAN_LINK, + IFLA_VXLAN_LOCAL, + IFLA_VXLAN_TTL, + IFLA_VXLAN_TOS, + IFLA_VXLAN_LEARNING, + IFLA_VXLAN_AGEING, + IFLA_VXLAN_LIMIT, + IFLA_VXLAN_PORT_RANGE, /* source port */ + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, + IFLA_VXLAN_PORT, /* destination port */ + IFLA_VXLAN_GROUP6, + IFLA_VXLAN_LOCAL6, + IFLA_VXLAN_UDP_CSUM, + IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + IFLA_VXLAN_REMCSUM_TX, + IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, + IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, + __IFLA_VXLAN_MAX +}; +#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) + +struct ifla_vxlan_port_range { + __be16 low; + __be16 high; +}; + +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + IFLA_GENEVE_TTL, + IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, + IFLA_GENEVE_REMOTE6, + IFLA_GENEVE_UDP_CSUM, + IFLA_GENEVE_UDP_ZERO_CSUM6_TX, + IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + +/* PPP section */ +enum { + IFLA_PPP_UNSPEC, + IFLA_PPP_DEV_FD, + __IFLA_PPP_MAX +}; +#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) + +/* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + +enum { + IFLA_GTP_UNSPEC, + IFLA_GTP_FD0, + IFLA_GTP_FD1, + IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, + __IFLA_GTP_MAX, +}; +#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) + +/* Bonding section */ + +enum { + IFLA_BOND_UNSPEC, + IFLA_BOND_MODE, + IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, + IFLA_BOND_TLB_DYNAMIC_LB, + __IFLA_BOND_MAX, +}; + +#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) + +enum { + IFLA_BOND_AD_INFO_UNSPEC, + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + +enum { + IFLA_BOND_SLAVE_UNSPEC, + IFLA_BOND_SLAVE_STATE, + IFLA_BOND_SLAVE_MII_STATUS, + IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, + IFLA_BOND_SLAVE_PERM_HWADDR, + IFLA_BOND_SLAVE_QUEUE_ID, + IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + __IFLA_BOND_SLAVE_MAX, +}; + +#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) + +/* SR-IOV virtual function management section */ + +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, /* VLAN ID and QoS */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ + IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ + IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query + * on/off switch + */ + IFLA_VF_STATS, /* network device statistics */ + IFLA_VF_TRUST, /* Trust VF */ + IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + +struct ifla_vf_mac { + __u32 vf; + __u8 mac[32]; /* MAX_ADDR_LEN */ +}; + +struct ifla_vf_vlan { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; +}; + +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + +struct ifla_vf_tx_rate { + __u32 vf; + __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ +}; + +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + +struct ifla_vf_spoofchk { + __u32 vf; + __u32 setting; +}; + +struct ifla_vf_guid { + __u32 vf; + __u64 guid; +}; + +enum { + IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ + IFLA_VF_LINK_STATE_ENABLE, /* link always up */ + IFLA_VF_LINK_STATE_DISABLE, /* link always down */ + __IFLA_VF_LINK_STATE_MAX, +}; + +struct ifla_vf_link_state { + __u32 vf; + __u32 link_state; +}; + +struct ifla_vf_rss_query_en { + __u32 vf; + __u32 setting; +}; + +enum { + IFLA_VF_STATS_RX_PACKETS, + IFLA_VF_STATS_TX_PACKETS, + IFLA_VF_STATS_RX_BYTES, + IFLA_VF_STATS_TX_BYTES, + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + IFLA_VF_STATS_PAD, + IFLA_VF_STATS_RX_DROPPED, + IFLA_VF_STATS_TX_DROPPED, + __IFLA_VF_STATS_MAX, +}; + +#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) + +struct ifla_vf_trust { + __u32 vf; + __u32 setting; +}; + +/* VF ports management section + * + * Nested layout of set/get msg is: + * + * [IFLA_NUM_VF] + * [IFLA_VF_PORTS] + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * ... + * [IFLA_PORT_SELF] + * [IFLA_PORT_*], ... + */ + +enum { + IFLA_VF_PORT_UNSPEC, + IFLA_VF_PORT, /* nest */ + __IFLA_VF_PORT_MAX, +}; + +#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1) + +enum { + IFLA_PORT_UNSPEC, + IFLA_PORT_VF, /* __u32 */ + IFLA_PORT_PROFILE, /* string */ + IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */ + IFLA_PORT_INSTANCE_UUID, /* binary UUID */ + IFLA_PORT_HOST_UUID, /* binary UUID */ + IFLA_PORT_REQUEST, /* __u8 */ + IFLA_PORT_RESPONSE, /* __u16, output only */ + __IFLA_PORT_MAX, +}; + +#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1) + +#define PORT_PROFILE_MAX 40 +#define PORT_UUID_MAX 16 +#define PORT_SELF_VF -1 + +enum { + PORT_REQUEST_PREASSOCIATE = 0, + PORT_REQUEST_PREASSOCIATE_RR, + PORT_REQUEST_ASSOCIATE, + PORT_REQUEST_DISASSOCIATE, +}; + +enum { + PORT_VDP_RESPONSE_SUCCESS = 0, + PORT_VDP_RESPONSE_INVALID_FORMAT, + PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_VDP_RESPONSE_UNUSED_VTID, + PORT_VDP_RESPONSE_VTID_VIOLATION, + PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION, + PORT_VDP_RESPONSE_OUT_OF_SYNC, + /* 0x08-0xFF reserved for future VDP use */ + PORT_PROFILE_RESPONSE_SUCCESS = 0x100, + PORT_PROFILE_RESPONSE_INPROGRESS, + PORT_PROFILE_RESPONSE_INVALID, + PORT_PROFILE_RESPONSE_BADSTATE, + PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_PROFILE_RESPONSE_ERROR, +}; + +struct ifla_port_vsi { + __u8 vsi_mgr_id; + __u8 vsi_type_id[3]; + __u8 vsi_type_version; + __u8 pad[3]; +}; + + +/* IPoIB section */ + +enum { + IFLA_IPOIB_UNSPEC, + IFLA_IPOIB_PKEY, + IFLA_IPOIB_MODE, + IFLA_IPOIB_UMCAST, + __IFLA_IPOIB_MAX +}; + +enum { + IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */ + IPOIB_MODE_CONNECTED = 1, /* using connected QPs */ +}; + +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + + +/* HSR section */ + +enum { + IFLA_HSR_UNSPEC, + IFLA_HSR_SLAVE1, + IFLA_HSR_SLAVE2, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, + IFLA_HSR_VERSION, /* HSR version */ + __IFLA_HSR_MAX, +}; + +#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) + +/* STATS section */ + +struct if_stats_msg { + __u8 family; + __u8 pad1; + __u16 pad2; + __u32 ifindex; + __u32 filter_mask; +}; + +/* A stats attribute can be netdev specific or a global stat. + * For netdev stats, lets use the prefix IFLA_STATS_LINK_* + */ +enum { + IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ + IFLA_STATS_LINK_64, + IFLA_STATS_LINK_XSTATS, + IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, + IFLA_STATS_AF_SPEC, + __IFLA_STATS_MAX, +}; + +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) + +#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) + +/* These are embedded into IFLA_STATS_LINK_XSTATS: + * [IFLA_STATS_LINK_XSTATS] + * -> [LINK_XSTATS_TYPE_xxx] + * -> [rtnl link type specific attributes] + */ +enum { + LINK_XSTATS_TYPE_UNSPEC, + LINK_XSTATS_TYPE_BRIDGE, + __LINK_XSTATS_TYPE_MAX +}; +#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) + +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + +/* XDP section */ + +#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) +#define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE | \ + XDP_FLAGS_HW_MODE) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_MODES) + +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, + XDP_ATTACHED_HW, +}; + +enum { + IFLA_XDP_UNSPEC, + IFLA_XDP_FD, + IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, + IFLA_XDP_PROG_ID, + __IFLA_XDP_MAX, +}; + +#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) + +enum { + IFLA_EVENT_NONE, + IFLA_EVENT_REBOOT, /* internal reset / reboot */ + IFLA_EVENT_FEATURES, /* change in offload features */ + IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ + IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ + IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ + IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ +}; + +#endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h new file mode 100644 index 000000000000..776bc92e9118 --- /dev/null +++ b/tools/include/uapi/linux/netlink.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETLINK_H +#define _UAPI__LINUX_NETLINK_H + +#include +#include /* for __kernel_sa_family_t */ +#include + +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_UNUSED 1 /* Unused number */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ +#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ +#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ +#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ +#define NETLINK_XFRM 6 /* ipsec */ +#define NETLINK_SELINUX 7 /* SELinux event notifications */ +#define NETLINK_ISCSI 8 /* Open-iSCSI */ +#define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 +#define NETLINK_CONNECTOR 11 +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ +#define NETLINK_IP6_FW 13 +#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ +#define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ +#define NETLINK_ECRYPTFS 19 +#define NETLINK_RDMA 20 +#define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_SMC 22 /* SMC monitoring */ + +#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG + +#define MAX_LINKS 32 + +struct sockaddr_nl { + __kernel_sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* zero */ + __u32 nl_pid; /* port ID */ + __u32 nl_groups; /* multicast groups mask */ +}; + +struct nlmsghdr { + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __u32 nlmsg_pid; /* Sending process port ID */ +}; + +/* Flags values */ + +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ + +/* Modifiers to GET request */ +#define NLM_F_ROOT 0x100 /* specify tree root */ +#define NLM_F_MATCH 0x200 /* return all matching */ +#define NLM_F_ATOMIC 0x400 /* atomic GET */ +#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) + +/* Modifiers to NEW request */ +#define NLM_F_REPLACE 0x100 /* Override existing */ +#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ +#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ +#define NLM_F_APPEND 0x800 /* Add to end of list */ + +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ + +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + +/* + 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL + 4.4BSD CHANGE NLM_F_REPLACE + + True CHANGE NLM_F_CREATE|NLM_F_REPLACE + Append NLM_F_CREATE + Check NLM_F_EXCL + */ + +#define NLMSG_ALIGNTO 4U +#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) +#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) +#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ + (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) +#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len <= (len)) +#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) + +#define NLMSG_NOOP 0x1 /* Nothing. */ +#define NLMSG_ERROR 0x2 /* Error */ +#define NLMSG_DONE 0x3 /* End of a dump */ +#define NLMSG_OVERRUN 0x4 /* Data lost */ + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +struct nlmsgerr { + int error; + struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 +}; + +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 +#endif +#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_LIST_MEMBERSHIPS 9 +#define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 + +struct nl_pktinfo { + __u32 group; +}; + +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +#ifndef __KERNEL__ +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif + +#define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr { + __u16 nla_len; + __u16 nla_type; +}; + +/* + * nla_type (16 bits) + * +---+---+-------------------------------+ + * | N | O | Attribute Type | + * +---+---+-------------------------------+ + * N := Carries nested attributes + * O := Payload stored in network byte order + * + * Note: The N and O flag are mutually exclusive. + */ +#define NLA_F_NESTED (1 << 15) +#define NLA_F_NET_BYTEORDER (1 << 14) +#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; + +#endif /* _UAPI__LINUX_NETLINK_H */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 83714ca1f22b..e6d5f8d1477f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -160,6 +160,12 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ + (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ + (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) -- cgit v1.2.3 From 949abbe88436c000cc63fce2bdfeb48b7d06a7df Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 30 Jan 2018 21:55:01 +0100 Subject: libbpf: add function to setup XDP Most of the code is taken from set_link_xdp_fd() in bpf_load.c and slightly modified to be library compliant. Signed-off-by: Eric Leblond Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 2 + tools/lib/bpf/libbpf.h | 4 ++ 3 files changed, 128 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5128677e4117..bf2772566240 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -25,6 +25,12 @@ #include #include #include "bpf.h" +#include "libbpf.h" +#include "nlattr.h" +#include +#include +#include +#include /* * When building perf, unistd.h is overridden. __NR_bpf is @@ -46,7 +52,9 @@ # endif #endif +#ifndef min #define min(x, y) ((x) < (y) ? (x) : (y)) +#endif static inline __u64 ptr_to_u64(const void *ptr) { @@ -413,3 +421,117 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) return err; } + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + socklen_t addrlen; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + return -errno; + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != sa.nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + goto cleanup; + case NLMSG_DONE: + break; + default: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 30c776375118..c60122d3ea85 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -106,6 +106,8 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", }; int libbpf_strerror(int err, char *buf, size_t size) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6e20003109e0..e42f96900318 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -42,6 +42,8 @@ enum libbpf_errno { LIBBPF_ERRNO__PROG2BIG, /* Program too big */ LIBBPF_ERRNO__KVER, /* Incorrect kernel version */ LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ + LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ + LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ __LIBBPF_ERRNO__END, }; @@ -246,4 +248,6 @@ long libbpf_get_error(const void *ptr); int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif -- cgit v1.2.3 From bbf48c18ee0cd18b53712aa09aefa29b64b3976e Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 30 Jan 2018 21:55:02 +0100 Subject: libbpf: add error reporting in XDP Parse netlink ext attribute to get the error message returned by the card. Code is partially take from libnl. We add netlink.h to the uapi include of tools. And we need to avoid include of userspace netlink header to have a successful build of sample so nlattr.h has a define to avoid the inclusion. Using a direct define could have been an issue as NLMSGERR_ATTR_MAX can change in the future. We also define SOL_NETLINK if not defined to avoid to have to copy socket.h for a fixed value. Signed-off-by: Eric Leblond Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/bpf.c | 11 +++ tools/lib/bpf/nlattr.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/nlattr.h | 72 +++++++++++++++++++ 4 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 tools/lib/bpf/nlattr.c create mode 100644 tools/lib/bpf/nlattr.h (limited to 'tools') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index d8749756352d..64c679d67109 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o +libbpf-y := libbpf.o bpf.o nlattr.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index bf2772566240..9c88f6e4156d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -32,6 +32,10 @@ #include #include +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -436,6 +440,7 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) struct nlmsghdr *nh; struct nlmsgerr *err; socklen_t addrlen; + int one = 1; memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; @@ -445,6 +450,11 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) return -errno; } + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + fprintf(stderr, "Netlink error reporting not supported\n"); + } + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ret = -errno; goto cleanup; @@ -521,6 +531,7 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) if (!err->error) continue; ret = err->error; + nla_dump_errormsg(nh); goto cleanup; case NLMSG_DONE: break; diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c new file mode 100644 index 000000000000..4719434278b2 --- /dev/null +++ b/tools/lib/bpf/nlattr.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * NETLINK Netlink attributes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2003-2013 Thomas Graf + */ + +#include +#include "nlattr.h" +#include +#include +#include + +static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_U8] = sizeof(uint8_t), + [NLA_U16] = sizeof(uint16_t), + [NLA_U32] = sizeof(uint32_t), + [NLA_U64] = sizeof(uint64_t), + [NLA_STRING] = 1, + [NLA_FLAG] = 0, +}; + +static int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +static int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +static void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static int nla_type(const struct nlattr *nla) +{ + return nla->nla_type & NLA_TYPE_MASK; +} + +static int validate_nla(struct nlattr *nla, int maxtype, + struct nla_policy *policy) +{ + struct nla_policy *pt; + unsigned int minlen = 0; + int type = nla_type(nla); + + if (type < 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + if (pt->type > NLA_TYPE_MAX) + return 0; + + if (pt->minlen) + minlen = pt->minlen; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (nla_len(nla) < minlen) + return -1; + + if (pt->maxlen && nla_len(nla) > pt->maxlen) + return -1; + + if (pt->type == NLA_STRING) { + char *data = nla_data(nla); + if (data[nla_len(nla) - 1] != '\0') + return -1; + } + + return 0; +} + +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * Create attribute index based on a stream of attributes. + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg head Head of attribute stream. + * @arg len Length of attribute stream. + * @arg policy Attribute validation policy. + * + * Iterates over the stream of attributes and stores a pointer to each + * attribute in the index array using the attribute type as index to + * the array. Attribute with a type greater than the maximum type + * specified will be silently ignored in order to maintain backwards + * compatibility. If \a policy is not NULL, the attribute will be + * validated using the specified policy. + * + * @see nla_validate + * @return 0 on success or a negative error code. + */ +static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + int type = nla_type(nla); + + if (type > maxtype) + continue; + + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + if (tb[type]) + fprintf(stderr, "Attribute of type %#x found multiple times in message, " + "previous attribute is being ignored.\n", type); + + tb[type] = nla; + } + + err = 0; +errout: + return err; +} + +/* dump netlink extended ack error message */ +int nla_dump_errormsg(struct nlmsghdr *nlh) +{ + struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { + [NLMSGERR_ATTR_MSG] = { .type = NLA_STRING }, + [NLMSGERR_ATTR_OFFS] = { .type = NLA_U32 }, + }; + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; + struct nlmsgerr *err; + char *errmsg = NULL; + int hlen, alen; + + /* no TLVs, nothing to do here */ + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 0; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + hlen = sizeof(*err); + + /* if NLM_F_CAPPED is set then the inner err msg was capped */ + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + hlen += nlmsg_len(&err->msg); + + attr = (struct nlattr *) ((void *) err + hlen); + alen = nlh->nlmsg_len - hlen; + + if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { + fprintf(stderr, + "Failed to parse extended error attributes\n"); + return 0; + } + + if (tb[NLMSGERR_ATTR_MSG]) + errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]); + + fprintf(stderr, "Kernel error message: %s\n", errmsg); + + return 0; +} diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h new file mode 100644 index 000000000000..931a71f68f93 --- /dev/null +++ b/tools/lib/bpf/nlattr.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +/* + * NETLINK Netlink attributes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2003-2013 Thomas Graf + */ + +#ifndef __NLATTR_H +#define __NLATTR_H + +#include +#include +/* avoid multiple definition of netlink features */ +#define __LINUX_NETLINK_H + +/** + * Standard attribute types to specify validation policy + */ +enum { + NLA_UNSPEC, /**< Unspecified type, binary data chunk */ + NLA_U8, /**< 8 bit integer */ + NLA_U16, /**< 16 bit integer */ + NLA_U32, /**< 32 bit integer */ + NLA_U64, /**< 64 bit integer */ + NLA_STRING, /**< NUL terminated character string */ + NLA_FLAG, /**< Flag */ + NLA_MSECS, /**< Micro seconds (64bit) */ + NLA_NESTED, /**< Nested attributes */ + __NLA_TYPE_MAX, +}; + +#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) + +/** + * @ingroup attr + * Attribute validation policy. + * + * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. + */ +struct nla_policy { + /** Type of attribute or NLA_UNSPEC */ + uint16_t type; + + /** Minimal length of payload required */ + uint16_t minlen; + + /** Maximal length of payload allowed */ + uint16_t maxlen; +}; + +/** + * @ingroup attr + * Iterate over a stream of attributes + * @arg pos loop counter, set to current attribute + * @arg head head of attribute stream + * @arg len length of attribute stream + * @arg rem initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +int nla_dump_errormsg(struct nlmsghdr *nlh); + +#endif /* __NLATTR_H */ -- cgit v1.2.3 From 6061a3d6720600c976b877c3ac1402b3ef0a8a55 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 30 Jan 2018 21:55:03 +0100 Subject: libbpf: add missing SPDX-License-Identifier Signed-off-by: Eric Leblond Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 2 ++ tools/lib/bpf/bpf.h | 2 ++ tools/lib/bpf/libbpf.c | 2 ++ tools/lib/bpf/libbpf.h | 2 ++ 4 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9c88f6e4156d..592a58a2b681 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: LGPL-2.1 + /* * common eBPF ELF operations. * diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 9f44c196931e..8d18fb73d7fb 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + /* * common eBPF ELF operations. * diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c60122d3ea85..71ddc481f349 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: LGPL-2.1 + /* * Common eBPF ELF object loading operations. * diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e42f96900318..f85906533cdd 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + /* * Common eBPF ELF object loading operations. * -- cgit v1.2.3 From 667ca1ec7c9eb7ac3b80590b6597151b4c2a750b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:10 -0500 Subject: membarrier/selftest: Test private expedited command Test the new MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED commands. Add checks expecting specific error values on system calls expected to fail. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Shuah Khan Acked-by: Greg Kroah-Hartman Acked-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Alice Ferrazzi Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Elder Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-2-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- .../testing/selftests/membarrier/membarrier_test.c | 111 ++++++++++++++++++--- 1 file changed, 95 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index 9e674d9514d1..e6ee73d01fa1 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -16,49 +16,119 @@ static int sys_membarrier(int cmd, int flags) static int test_membarrier_cmd_fail(void) { int cmd = -1, flags = 0; + const char *test_name = "sys membarrier invalid command"; if (sys_membarrier(cmd, flags) != -1) { ksft_exit_fail_msg( - "sys membarrier invalid command test: command = %d, flags = %d. Should fail, but passed\n", - cmd, flags); + "%s test: command = %d, flags = %d. Should fail, but passed\n", + test_name, cmd, flags); + } + if (errno != EINVAL) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EINVAL, strerror(EINVAL), + errno, strerror(errno)); } ksft_test_result_pass( - "sys membarrier invalid command test: command = %d, flags = %d. Failed as expected\n", - cmd, flags); + "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n", + test_name, cmd, flags, errno); return 0; } static int test_membarrier_flags_fail(void) { int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags"; if (sys_membarrier(cmd, flags) != -1) { ksft_exit_fail_msg( - "sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Should fail, but passed\n", - flags); + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EINVAL) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EINVAL, strerror(EINVAL), + errno, strerror(errno)); } ksft_test_result_pass( - "sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Failed as expected\n", - flags); + "%s test: flags = %d, errno = %d. Failed as expected\n", + test_name, flags, errno); return 0; } -static int test_membarrier_success(void) +static int test_membarrier_shared_success(void) { int cmd = MEMBARRIER_CMD_SHARED, flags = 0; - const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED\n"; + const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_fail(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure"; + + if (sys_membarrier(cmd, flags) != -1) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EPERM) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EPERM, strerror(EPERM), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_register_private_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED"; if (sys_membarrier(cmd, flags) != 0) { ksft_exit_fail_msg( - "sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n", - flags); + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); } ksft_test_result_pass( - "sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n", - flags); + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); return 0; } @@ -72,7 +142,16 @@ static int test_membarrier(void) status = test_membarrier_flags_fail(); if (status) return status; - status = test_membarrier_success(); + status = test_membarrier_shared_success(); + if (status) + return status; + status = test_membarrier_private_expedited_fail(); + if (status) + return status; + status = test_membarrier_register_private_expedited_success(); + if (status) + return status; + status = test_membarrier_private_expedited_success(); if (status) return status; return 0; @@ -108,5 +187,5 @@ int main(int argc, char **argv) test_membarrier_query(); test_membarrier(); - ksft_exit_pass(); + return ksft_exit_pass(); } -- cgit v1.2.3 From 92485487ba834f6665ec13dfb2c69e80cd0c7c37 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:14 -0500 Subject: membarrier/selftest: Test global expedited command Test the new MEMBARRIER_CMD_GLOBAL_EXPEDITED and MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED commands. Adapt to the MEMBARRIER_CMD_SHARED -> MEMBARRIER_CMD_GLOBAL rename. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Shuah Khan Acked-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Alice Ferrazzi Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Elder Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-6-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- .../testing/selftests/membarrier/membarrier_test.c | 59 ++++++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index e6ee73d01fa1..eec39a0e1f77 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -59,10 +59,10 @@ static int test_membarrier_flags_fail(void) return 0; } -static int test_membarrier_shared_success(void) +static int test_membarrier_global_success(void) { - int cmd = MEMBARRIER_CMD_SHARED, flags = 0; - const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED"; + int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL"; if (sys_membarrier(cmd, flags) != 0) { ksft_exit_fail_msg( @@ -132,6 +132,40 @@ static int test_membarrier_private_expedited_success(void) return 0; } +static int test_membarrier_register_global_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_global_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + static int test_membarrier(void) { int status; @@ -142,7 +176,7 @@ static int test_membarrier(void) status = test_membarrier_flags_fail(); if (status) return status; - status = test_membarrier_shared_success(); + status = test_membarrier_global_success(); if (status) return status; status = test_membarrier_private_expedited_fail(); @@ -152,6 +186,19 @@ static int test_membarrier(void) if (status) return status; status = test_membarrier_private_expedited_success(); + if (status) + return status; + /* + * It is valid to send a global membarrier from a non-registered + * process. + */ + status = test_membarrier_global_expedited_success(); + if (status) + return status; + status = test_membarrier_register_global_expedited_success(); + if (status) + return status; + status = test_membarrier_global_expedited_success(); if (status) return status; return 0; @@ -173,8 +220,10 @@ static int test_membarrier_query(void) } ksft_exit_fail_msg("sys_membarrier() failed\n"); } - if (!(ret & MEMBARRIER_CMD_SHARED)) + if (!(ret & MEMBARRIER_CMD_GLOBAL)) { + ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n"); ksft_exit_fail_msg("sys_membarrier is not supported.\n"); + } ksft_test_result_pass("sys_membarrier available\n"); return 0; -- cgit v1.2.3 From 460e8c3340a265d1d70fa1ee05c42afd68b2efa0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:20 -0500 Subject: membarrier/selftest: Test private expedited sync core command Test the new MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE and MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE commands. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Shuah Khan Acked-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Alice Ferrazzi Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Elder Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-12-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- .../testing/selftests/membarrier/membarrier_test.c | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index eec39a0e1f77..22bffd55a523 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -132,6 +132,63 @@ static int test_membarrier_private_expedited_success(void) return 0; } +static int test_membarrier_private_expedited_sync_core_fail(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure"; + + if (sys_membarrier(cmd, flags) != -1) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EPERM) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EPERM, strerror(EPERM), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_register_private_expedited_sync_core_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_sync_core_success(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + static int test_membarrier_register_global_expedited_success(void) { int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0; @@ -188,6 +245,22 @@ static int test_membarrier(void) status = test_membarrier_private_expedited_success(); if (status) return status; + status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0); + if (status < 0) { + ksft_test_result_fail("sys_membarrier() failed\n"); + return status; + } + if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { + status = test_membarrier_private_expedited_sync_core_fail(); + if (status) + return status; + status = test_membarrier_register_private_expedited_sync_core_success(); + if (status) + return status; + status = test_membarrier_private_expedited_sync_core_success(); + if (status) + return status; + } /* * It is valid to send a global membarrier from a non-registered * process. -- cgit v1.2.3 From da6f8320d58623eae9b6fa2f09b1b4f60a772ce9 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 4 Jan 2018 10:06:38 -0800 Subject: ACPICA: All acpica: Update copyrights to 2018 including tool signons. Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/common/cmfsize.c | 2 +- tools/power/acpi/common/getopt.c | 2 +- tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 2 +- tools/power/acpi/os_specific/service_layers/osunixdir.c | 2 +- tools/power/acpi/os_specific/service_layers/osunixmap.c | 2 +- tools/power/acpi/os_specific/service_layers/osunixxf.c | 2 +- tools/power/acpi/tools/acpidump/acpidump.h | 2 +- tools/power/acpi/tools/acpidump/apdump.c | 2 +- tools/power/acpi/tools/acpidump/apfiles.c | 2 +- tools/power/acpi/tools/acpidump/apmain.c | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index 5b38dc2fec4f..dfa6fee1b915 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 6e78413bb2cb..f7032c9ab35e 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 52a39ecf5ca1..e7347edfd4f9 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index ea14eaeb268f..9b5e61b636d9 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index cf9b5a54df92..8b26924e2602 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 025c1b07049d..34c044da433c 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index d6aa40fce2b1..3c679607d1c3 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 0634449156d8..9ad1712e4cf9 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index d686e11936c4..856e1b83407b 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index be418fba9441..f4ef826800cf 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without -- cgit v1.2.3 From 7b4eb53d95c30bdc55c44647c6968f24227fd1ba Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 5 Feb 2018 16:25:24 -0800 Subject: tools/bpf: fix batch-mode test failure of test_xdp_redirect.sh The tests at tools/testing/selftests/bpf can run in patch mode, e.g., make -C tools/testing/selftests/bpf run_tests With the batch mode, I experimented intermittent test failure of test_xdp_redirect.sh. .... selftests: test_xdp_redirect [PASS] selftests: test_xdp_redirect.sh [PASS] RTNETLINK answers: File exists selftests: test_xdp_meta [FAILED] selftests: test_xdp_meta.sh [FAIL] .... The following illustrates what caused the failure: (1). test_xdp_redirect creates veth pairs (veth1,veth11) and (veth2,veth22), and assign veth11 and veth22 to namespace ns1 and ns2 respectively. (2). at the end of test_xdp_redirect test, ns1 and ns2 are deleted. During this process, the deletion of actual namespace resources, including deletion of veth1{1} and veth2{2}, is put into a workqueue to be processed asynchronously. (3). test_xdp_meta tries to create veth pair (veth1, veth2). The previous veth deletions in step (2) have not finished yet, and veth1 or veth2 may be still valid in the kernel, thus causing the failure. The fix is to explicitly delete the veth pair before test_xdp_redirect exits. Only one end of veth needs deletion as the kernel will delete the other end automatically. Also test_xdp_meta is also fixed in similar manner to avoid future potential issues. Fixes: 996139e801fd ("selftests: bpf: add a test for XDP redirect") Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_xdp_meta.sh | 1 + tools/testing/selftests/bpf/test_xdp_redirect.sh | 2 ++ 2 files changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 307aa856cee3..637fcf4fe4e3 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -9,6 +9,7 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index 344a3656dea6..c4b17e08d431 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -19,6 +19,8 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null + ip link del veth2 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } -- cgit v1.2.3 From 051803c0d01f4d8a197ecc08bfeffdff00d86c62 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Nov 2017 23:06:32 -0400 Subject: radix tree test suite: Remove ARRAY_SIZE This is now defined in tools/include/linux/kernel.h, so our definition generates a warning. Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/linux/kernel.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index c3bc3f364f68..426f32f28547 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -17,6 +17,4 @@ #define pr_debug printk #define pr_cont printk -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - #endif /* _KERNEL_H */ -- cgit v1.2.3 From 490645d027c5925b30c88b9c7a663850a641d15d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 9 Nov 2017 20:15:14 -0500 Subject: idr test suite: Fix ida_test_random() The test was checking the wrong errno; ida_get_new_above() returns EAGAIN, not ENOMEM on memory allocation failure. Double the number of threads to increase the chance that we actually exercise this path during the test suite (it was a bit sporadic before). Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/idr-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 30cd0b296f1a..193450b29bf0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -380,7 +380,7 @@ void ida_check_random(void) do { ida_pre_get(&ida, GFP_KERNEL); err = ida_get_new_above(&ida, bit, &id); - } while (err == -ENOMEM); + } while (err == -EAGAIN); assert(!err); assert(id == bit); } @@ -489,7 +489,7 @@ static void *ida_random_fn(void *arg) void ida_thread_tests(void) { - pthread_t threads[10]; + pthread_t threads[20]; int i; for (i = 0; i < ARRAY_SIZE(threads); i++) -- cgit v1.2.3 From 6e6d301490936789ff57daaaaf63f44d928a4028 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 14:27:14 -0500 Subject: IDR test suite: Check handling negative end correctly One of the charming quirks of the idr_alloc() interface is that you can pass a negative end and it will be interpreted as "maximum". Ensure we don't break that. Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/idr-test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 193450b29bf0..892ef8855b02 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -207,6 +207,7 @@ void idr_checks(void) assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); } assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC); + assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC); idr_for_each(&idr, item_idr_free, &idr); idr_destroy(&idr); -- cgit v1.2.3 From 460488c58ca8b9167463ac22ec9a2e33db351962 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 15:16:24 -0500 Subject: idr: Remove idr_alloc_ext It has no more users, so remove it. Move idr_alloc() back into idr.c, move the guts of idr_alloc_cmn() into idr_alloc_u32(), remove the wrappers around idr_get_free_cmn() and rename it to idr_get_free(). While there is now no interface to allocate IDs larger than a u32, the IDR internals remain ready to handle a larger ID should a need arise. These changes make it possible to provide the guarantee that, if the nextid pointer points into the object, the object's ID will be initialised before a concurrent lookup can find the object. Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/idr-test.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 892ef8855b02..36437ade429c 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -215,6 +215,23 @@ void idr_checks(void) assert(idr_is_empty(&idr)); + idr_set_cursor(&idr, INT_MAX - 3UL); + for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) { + struct item *item; + unsigned int id; + if (i <= INT_MAX) + item = item_create(i, 0); + else + item = item_create(i - INT_MAX - 1, 0); + + id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL); + assert(id == item->index); + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); + assert(idr_is_empty(&idr)); + for (i = 1; i < 10000; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i); -- cgit v1.2.3 From 6ce711f2750031d12cec91384ac5cfa0a485b60a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 30 Nov 2017 13:45:11 -0500 Subject: idr: Make 1-based IDRs more efficient About 20% of the IDR users in the kernel want the allocated IDs to start at 1. The implementation currently searches all the way down the left hand side of the tree, finds no free ID other than ID 0, walks all the way back up, and then all the way down again. This patch 'rebases' the ID so we fill the entire radix tree, rather than leave a gap at 0. Chris Wilson says: "I did the quick hack of allocating index 0 of the idr and that eradicated idr_get_free() from being at the top of the profiles for the many-object stress tests. This improvement will be much appreciated." Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/idr-test.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 36437ade429c..44ef9eba5a7a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,11 +153,12 @@ void idr_nowait_test(void) idr_destroy(&idr); } -void idr_get_next_test(void) +void idr_get_next_test(int base) { unsigned long i; int nextid; DEFINE_IDR(idr); + idr_init_base(&idr, base); int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; @@ -244,7 +245,9 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); - idr_get_next_test(); + idr_get_next_test(0); + idr_get_next_test(1); + idr_get_next_test(4); } /* -- cgit v1.2.3 From ad343a98e74e85aa91d844310e797f96fee6983b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 6 Feb 2018 15:37:52 -0800 Subject: tools/lib/subcmd/pager.c: do not alias select() params Use a separate fd set for select()-s exception fds param to fix the following gcc warning: pager.c:36:12: error: passing argument 2 to restrict-qualified parameter aliases with argument 4 [-Werror=restrict] select(1, &in, NULL, &in, NULL); ^~~ ~~~ Link: http://lkml.kernel.org/r/20180101105626.7168-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/lib/subcmd/pager.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c index 5ba754d17952..9997a8805a82 100644 --- a/tools/lib/subcmd/pager.c +++ b/tools/lib/subcmd/pager.c @@ -30,10 +30,13 @@ static void pager_preexec(void) * have real input */ fd_set in; + fd_set exception; FD_ZERO(&in); + FD_ZERO(&exception); FD_SET(0, &in); - select(1, &in, NULL, &in, NULL); + FD_SET(0, &exception); + select(1, &in, NULL, &exception, NULL); setenv("LESS", "FRSX", 0); } -- cgit v1.2.3 From 0ade34c37012ea5c516d9aa4d19a56e9f40a55ed Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 6 Feb 2018 15:38:34 -0800 Subject: lib: optimize cpumask_next_and() We've measured that we spend ~0.6% of sys cpu time in cpumask_next_and(). It's essentially a joined iteration in search for a non-zero bit, which is currently implemented as a lookup join (find a nonzero bit on the lhs, lookup the rhs to see if it's set there). Implement a direct join (find a nonzero bit on the incrementally built join). Also add generic bitmap benchmarks in the new `test_find_bit` module for new function (see `find_next_and_bit` in [2] and [3] below). For cpumask_next_and, direct benchmarking shows that it's 1.17x to 14x faster with a geometric mean of 2.1 on 32 CPUs [1]. No impact on memory usage. Note that on Arm, the new pure-C implementation still outperforms the old one that uses a mix of C and asm (`find_next_bit`) [3]. [1] Approximate benchmark code: ``` unsigned long src1p[nr_cpumask_longs] = {pattern1}; unsigned long src2p[nr_cpumask_longs] = {pattern2}; for (/*a bunch of repetitions*/) { for (int n = -1; n <= nr_cpu_ids; ++n) { asm volatile("" : "+rm"(src1p)); // prevent any optimization asm volatile("" : "+rm"(src2p)); unsigned long result = cpumask_next_and(n, src1p, src2p); asm volatile("" : "+rm"(result)); } } ``` Results: pattern1 pattern2 time_before/time_after 0x0000ffff 0x0000ffff 1.65 0x0000ffff 0x00005555 2.24 0x0000ffff 0x00001111 2.94 0x0000ffff 0x00000000 14.0 0x00005555 0x0000ffff 1.67 0x00005555 0x00005555 1.71 0x00005555 0x00001111 1.90 0x00005555 0x00000000 6.58 0x00001111 0x0000ffff 1.46 0x00001111 0x00005555 1.49 0x00001111 0x00001111 1.45 0x00001111 0x00000000 3.10 0x00000000 0x0000ffff 1.18 0x00000000 0x00005555 1.18 0x00000000 0x00001111 1.17 0x00000000 0x00000000 1.25 ----------------------------- geo.mean 2.06 [2] test_find_next_bit, X86 (skylake) [ 3913.477422] Start testing find_bit() with random-filled bitmap [ 3913.477847] find_next_bit: 160868 cycles, 16484 iterations [ 3913.477933] find_next_zero_bit: 169542 cycles, 16285 iterations [ 3913.478036] find_last_bit: 201638 cycles, 16483 iterations [ 3913.480214] find_first_bit: 4353244 cycles, 16484 iterations [ 3913.480216] Start testing find_next_and_bit() with random-filled bitmap [ 3913.481074] find_next_and_bit: 89604 cycles, 8216 iterations [ 3913.481075] Start testing find_bit() with sparse bitmap [ 3913.481078] find_next_bit: 2536 cycles, 66 iterations [ 3913.481252] find_next_zero_bit: 344404 cycles, 32703 iterations [ 3913.481255] find_last_bit: 2006 cycles, 66 iterations [ 3913.481265] find_first_bit: 17488 cycles, 66 iterations [ 3913.481266] Start testing find_next_and_bit() with sparse bitmap [ 3913.481272] find_next_and_bit: 764 cycles, 1 iterations [3] test_find_next_bit, arm (v7 odroid XU3). [ 267.206928] Start testing find_bit() with random-filled bitmap [ 267.214752] find_next_bit: 4474 cycles, 16419 iterations [ 267.221850] find_next_zero_bit: 5976 cycles, 16350 iterations [ 267.229294] find_last_bit: 4209 cycles, 16419 iterations [ 267.279131] find_first_bit: 1032991 cycles, 16420 iterations [ 267.286265] Start testing find_next_and_bit() with random-filled bitmap [ 267.302386] find_next_and_bit: 2290 cycles, 8140 iterations [ 267.309422] Start testing find_bit() with sparse bitmap [ 267.316054] find_next_bit: 191 cycles, 66 iterations [ 267.322726] find_next_zero_bit: 8758 cycles, 32703 iterations [ 267.329803] find_last_bit: 84 cycles, 66 iterations [ 267.336169] find_first_bit: 4118 cycles, 66 iterations [ 267.342627] Start testing find_next_and_bit() with sparse bitmap [ 267.356919] find_next_and_bit: 91 cycles, 1 iterations [courbet@google.com: v6] Link: http://lkml.kernel.org/r/20171129095715.23430-1-courbet@google.com [geert@linux-m68k.org: m68k/bitops: always include ] Link: http://lkml.kernel.org/r/1512556816-28627-1-git-send-email-geert@linux-m68k.org Link: http://lkml.kernel.org/r/20171128131334.23491-1-courbet@google.com Signed-off-by: Clement Courbet Signed-off-by: Geert Uytterhoeven Cc: Yury Norov Cc: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/asm-generic/bitops/find.h | 16 ++++++++++++++ tools/lib/find_bit.c | 39 ++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h index 9311fadaaab2..16ed1982cb34 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 42c15f906aac..a88bd507091e 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -22,22 +22,29 @@ #include #include -#if !defined(find_next_bit) +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit and - * find_next_zero_bit. The difference is the "invert" argument, which - * is XORed with each fetched word before searching it for one bits. + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. */ -static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -48,7 +55,10 @@ static unsigned long _find_next_bit(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -62,7 +72,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, 0UL); + return _find_next_bit(addr, NULL, size, offset, 0UL); } #endif @@ -104,6 +114,15 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, ~0UL); + return _find_next_bit(addr, NULL, size, offset, ~0UL); +} +#endif + +#ifndef find_next_and_bit +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); } #endif -- cgit v1.2.3 From 035d808f7cfd578dc210b584beb17e365f34d39a Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Wed, 7 Feb 2018 23:45:34 +0530 Subject: selftests: bpf: test_kmod.sh: check the module path before insmod test_kmod.sh reported false failure when module not present. Check test_bpf.ko is present in the path before loading it. Two cases to be addressed here, In the development process of test_bpf.c unit testing will be done by developers by using "insmod $SRC_TREE/lib/test_bpf.ko" On the other hand testers run full tests by installing modules on device under test (DUT) and followed by modprobe to insert the modules accordingly. Signed-off-by: Naresh Kamboju Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_kmod.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index ed4774d8d6ed..35669ccd4d23 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -10,9 +10,21 @@ test_run() echo "[ JIT enabled:$1 hardened:$2 ]" dmesg -C - insmod $SRC_TREE/lib/test_bpf.ko 2> /dev/null - if [ $? -ne 0 ]; then - rc=1 + if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then + insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null + if [ $? -ne 0 ]; then + rc=1 + fi + else + # Use modprobe dry run to check for missing test_bpf module + if ! /sbin/modprobe -q -n test_bpf; then + echo "test_bpf: [SKIP]" + elif /sbin/modprobe -q test_bpf; then + echo "test_bpf: ok" + else + echo "test_bpf: [FAIL]" + rc=1 + fi fi rmmod test_bpf 2> /dev/null dmesg | grep FAIL -- cgit v1.2.3 From 0badd331491097cc3702d05a6dd0264a434712b2 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:13 -0800 Subject: libbpf: complete list of strings for guessing program type It seems that the type guessing feature for libbpf, based on the name of the ELF section the program is located in, was inspired from samples/bpf/prog_load.c, which was not used by any sample for loading programs of certain types such as TC actions and classifiers, or LWT-related types. As a consequence, libbpf is not able to guess the type of such programs and to load them automatically if type is not provided to the `bpf_load_prog()` function. Add ELF section names associated to those eBPF program types so that they can be loaded with e.g. bpftool as well. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 71ddc481f349..c64840365433 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1816,12 +1816,17 @@ static const struct { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), }; -- cgit v1.2.3 From 92426820f7616caebdf6ba665f749102a670b3d4 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:14 -0800 Subject: tools: bpftool: exit doc Makefile early if rst2man is not available If rst2man is not available on the system, running `make doc` from the bpftool directory fails with an error message. However, it creates empty manual pages (.8 files in this case). A subsequent call to `make doc-install` would then succeed and install those empty man pages on the system. To prevent this, raise a Makefile error and exit immediately if rst2man is not available before generating the pages from the rst documentation. Fixes: ff69c21a85a4 ("tools: bpftool: add documentation") Reported-by: Jason van Aaardt Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/Makefile | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index c462a928e03d..a9d47c1558bb 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -23,7 +23,12 @@ DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) man: man8 man8: $(DOC_MAN8) +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) + $(OUTPUT)%.8: %.rst +ifndef RST2MAN_DEP + $(error "rst2man not found, but required to generate man pages") +endif $(QUIET_GEN)rst2man $< > $@ clean: -- cgit v1.2.3 From 2148481d5d3111e5e9aa5adc1905fa3539e548c8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:15 -0800 Subject: tools: bpftool: make syntax for program map update explicit in man page Specify in the documentation that when using bpftool to update a map of type BPF_MAP_TYPE_PROG_ARRAY, the syntax for the program used as a value should use the "id|tag|pinned" keywords convention, as used with "bpftool prog" commands. Fixes: ff69c21a85a4 ("tools: bpftool: add documentation") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 0ab32b312aec..457e868bd32f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -31,7 +31,8 @@ MAP COMMANDS | **bpftool** **map help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *VALUE* := { *BYTES* | *MAP* | *PROGRAM* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *VALUE* := { *BYTES* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } DESCRIPTION -- cgit v1.2.3 From 55f3538c4923e9dfca132e99ebec370e8094afda Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:16 -0800 Subject: tools: bpftool: add bash completion for `bpftool prog load` Add bash completion for bpftool command `prog load`. Completion for this command is easy, as it only takes existing file paths as arguments. Fixes: 49a086c201a9 ("bpftool: implement prog load command") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/bash-completion/bpftool | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 0137866bb8f6..17d246418348 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -230,10 +230,14 @@ _bpftool() fi return 0 ;; + load) + _filedir + return 0 + ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help pin show list' -- \ - "$cur" ) ) + COMPREPLY=( $( compgen -W 'dump help pin load \ + show list' -- "$cur" ) ) ;; esac ;; -- cgit v1.2.3 From a827a164b98be2acba6a2e7559643ac9a5745086 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:17 -0800 Subject: tools: bpftool: add bash completion for cgroup commands Add bash completion for "bpftool cgroup" command family. While at it, also fix the formatting of some keywords in the man page for cgroups. Fixes: 5ccda64d38cc ("bpftool: implement cgroup bpf operations") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 +- tools/bpf/bpftool/bash-completion/bpftool | 64 ++++++++++++++++++++-- 2 files changed, 62 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 2fe2a1bdbe3e..0e4e923235b6 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -26,8 +26,8 @@ MAP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* } -| *ATTACH_FLAGS* := { *multi* | *override* } +| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** } +| *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 17d246418348..08719c54a614 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -52,16 +52,24 @@ _bpftool_once_attr() done } -# Takes a list of words in argument; adds them all to COMPREPLY if none of them -# is already present on the command line. Returns no value. -_bpftool_one_of_list() +# Takes a list of words as argument; if any of those words is present on the +# command line, return 0. Otherwise, return 1. +_bpftool_search_list() { local w idx for w in $*; do for (( idx=3; idx < ${#words[@]}-1; idx++ )); do - [[ $w == ${words[idx]} ]] && return 1 + [[ $w == ${words[idx]} ]] && return 0 done done + return 1 +} + +# Takes a list of words in argument; adds them all to COMPREPLY if none of them +# is already present on the command line. Returns no value. +_bpftool_one_of_list() +{ + _bpftool_search_list $* && return 1 COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) ) } @@ -351,6 +359,54 @@ _bpftool() ;; esac ;; + cgroup) + case $command in + show|list) + _filedir + return 0 + ;; + attach|detach) + local ATTACH_TYPES='ingress egress sock_create sock_ops \ + device' + local ATTACH_FLAGS='multi override' + local PROG_TYPE='id pinned tag' + case $prev in + $command) + _filedir + return 0 + ;; + ingress|egress|sock_create|sock_ops|device) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_prog_ids + return 0 + ;; + *) + if ! _bpftool_search_list "$ATTACH_TYPES"; then + COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \ + "$cur" ) ) + elif [[ "$command" == "attach" ]]; then + # We have an attach type on the command line, + # but it is not the previous word, or + # "id|pinned|tag" (we already checked for + # that). This should only leave the case when + # we need attach flags for "attach" commamnd. + _bpftool_one_of_list "$ATTACH_FLAGS" + fi + return 0 + ;; + esac + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help attach detach \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool -- cgit v1.2.3 From df9d36d9e8adefe0975c7b5888967651b3db3af3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 6 Feb 2018 17:10:12 -0500 Subject: selftests/ftrace: Have reset_ftrace_filter handle modules If a function probe in set_ftrace_filter belongs to a module, it will contain the module name. Like: wmi_query_block [wmi]:traceoff:unlimited But writing: '!wmi_query_block [wmi]:traceoff' > set_ftrace_filter will cause an error. We still need to write: '!wmi_query_block:traceoff' > set_ftrace_filter Cc: Shuah Khan Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/functions | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index f2019b37370d..e7c4c7b752a2 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -37,17 +37,18 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter if [ "$tr" = "" ]; then continue fi + name=`echo $t | cut -d: -f1 | cut -d' ' -f1` if [ $tr = "enable_event" -o $tr = "disable_event" ]; then - tr=`echo $t | cut -d: -f1-4` + tr=`echo $t | cut -d: -f2-4` limit=`echo $t | cut -d: -f5` else - tr=`echo $t | cut -d: -f1-2` + tr=`echo $t | cut -d: -f2` limit=`echo $t | cut -d: -f3` fi if [ "$limit" != "unlimited" ]; then tr="$tr:$limit" fi - echo "!$tr" > set_ftrace_filter + echo "!$name:$tr" > set_ftrace_filter done } -- cgit v1.2.3 From 0787ce336075ac0fa2d356de4e3c2a2488e851a6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 6 Feb 2018 17:15:02 -0500 Subject: selftests/ftrace: Have reset_ftrace_filter handle multiple instances If a probe is attached to a static function that is in multiple files with the same name, removing it by name will remove all instances: # grep jump_label_unlock set_ftrace_filter jump_label_unlock:traceoff:unlimited jump_label_unlock:traceoff:unlimited # echo '!jump_label_unlock:traceoff' >> set_ftrace_filter # grep jump_label_unlock set_ftrace_filter # But the loop in reset_ftrace_filter will try to remove multiple instances multiple times. If this happens the second time will error and cause the test to fail. At each iteration of the loop, check to see if the probe being removed still exists. Cc: Shuah Khan Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/functions | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index e7c4c7b752a2..df3dd7fe5f9b 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -37,6 +37,9 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter if [ "$tr" = "" ]; then continue fi + if ! grep -q "$t" set_ftrace_filter; then + continue; + fi name=`echo $t | cut -d: -f1 | cut -d' ' -f1` if [ $tr = "enable_event" -o $tr = "disable_event" ]; then tr=`echo $t | cut -d: -f2-4` -- cgit v1.2.3 From 97fe22adf33f06519bfdf7dad33bcd562e366c8f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 6 Feb 2018 17:19:03 -0500 Subject: selftests/ftrace: Add some missing glob checks Al Viro discovered a bug in the glob ftrace filtering code where "*a*b" is treated the same as "a*b", and functions that would be selected by "*a*b" but not "a*b" are not selected with "*a*b". Add tests for patterns "*a*b" and "a*b*" to the glob selftest. Link: http://lkml.kernel.org/r/20180127170748.GF13338@ZenIV.linux.org.uk Cc: Shuah Khan Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 589d52b211b7..27a54a17da65 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -29,6 +29,12 @@ ftrace_filter_check '*schedule*' '^.*schedule.*$' # filter by *, end match ftrace_filter_check 'schedule*' '^schedule.*$' +# filter by *mid*end +ftrace_filter_check '*aw*lock' '.*aw.*lock$' + +# filter by start*mid* +ftrace_filter_check 'mutex*try*' '^mutex.*try.*' + # Advanced full-glob matching feature is recently supported. # Skip the tests if we are sure the kernel does not support it. if grep -q 'accepts: .* glob-matching-pattern' README ; then -- cgit v1.2.3 From 878cb3fb06c67e8f1a452346e0bc6bb85f29b0a0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 6 Feb 2018 17:28:36 -0500 Subject: selftests/ftrace: Add more tests for removing of function probes Al Viro discovered a bug in the removing of function probes where if it had a '*' at the beginning, it would fail to find any matches. That is, because it reset the glob search string to the the initial string with a "MATCH_END" type, instead of skipping the wildcard "*" it included it, where it would not match any functions because "*" was being treated as a normal character and not a wildcard one. Link: http://lkml.kernel.org/r/20180127031706.GE13338@ZenIV.linux.org.uk Cc: Shuah Khan Signed-off-by: Steven Rostedt (VMware) --- .../ftrace/test.d/ftrace/func_set_ftrace_file.tc | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 0f3f92622e33..68e7a48f5828 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -128,6 +128,43 @@ if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then fail "Expected $FUNC1 and $FUNC2" fi +test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter + cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual + DIFF=`diff $TMPDIR/actual $TMPDIR/expected` + test -z "$DIFF" +} + +# Set traceoff trigger for all fuctions with "lock" in their name +cat available_filter_functions | cut -d' ' -f1 | grep 'lock' | sort -u > $TMPDIR/expected +echo '*lock*:traceoff' > set_ftrace_filter +test_actual + +# now remove all with 'try' in it, and end with lock +grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!*try*lock:traceoff' >> set_ftrace_filter +test_actual + +# remove all that start with "m" and end with "lock" +grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!m*lock:traceoff' >> set_ftrace_filter +test_actual + +# remove all that start with "c" and have "unlock" +grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!c*unlock*:traceoff' >> set_ftrace_filter +test_actual + +# clear all the rest +> $TMPDIR/expected +echo '!*:traceoff' >> set_ftrace_filter +test_actual + +rm $TMPDIR/expected +rm $TMPDIR/actual + do_reset exit 0 -- cgit v1.2.3 From 8c88181ed452707f3815827225517b1964463b95 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:12 +0100 Subject: bpf: Sync kernel ABI header with tooling header for bpf_common.h I recently fixed up a lot of commits that forgot to keep the tooling headers in sync. And then I forgot to do the same thing in commit cb5f7334d479 ("bpf: add comments to BPF ld/ldx sizes"). Let correct that before people notice ;-). Lawrence did partly fix/sync this for bpf.h in commit d6d4f60c3a09 ("bpf: add selftest for tcpbpf"). Fixes: cb5f7334d479 ("bpf: add comments to BPF ld/ldx sizes") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf_common.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h index 18be90725ab0..ee97668bdadb 100644 --- a/tools/include/uapi/linux/bpf_common.h +++ b/tools/include/uapi/linux/bpf_common.h @@ -15,9 +15,10 @@ /* ld/ldx fields */ #define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 -#define BPF_H 0x08 -#define BPF_B 0x10 +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ #define BPF_MODE(code) ((code) & 0xe0) #define BPF_IMM 0x00 #define BPF_ABS 0x20 -- cgit v1.2.3 From 077c066a6c5445423147496e6c9b9a2c2d2ee762 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:17 +0100 Subject: tools/libbpf: improve the pr_debug statements to contain section numbers While debugging a bpf ELF loading issue, I needed to correlate the ELF section number with the failed relocation section reference. Thus, add section numbers/index to the pr_debug. In debug mode, also print section that were skipped. This helped me identify that a section (.eh_frame) was skipped, and this was the reason the relocation section (.rel.eh_frame) could not find that section number. The section numbers corresponds to the readelf tools Section Headers [Nr]. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c64840365433..0c794f7fc050 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -319,8 +319,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog under section %s\n", - section_name); + pr_warning("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); goto errout; } @@ -763,29 +763,29 @@ static int bpf_object__elf_collect(struct bpf_object *obj) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section header from %s\n", - obj->path); + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section name from %s\n", - obj->path); + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section data from %s(%s)\n", - name, obj->path); + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } - pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n", - name, (unsigned long)data->d_size, + pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + idx, name, (unsigned long)data->d_size, (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); @@ -840,6 +840,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.reloc[n].shdr = sh; obj->efile.reloc[n].data = data; } + } else { + pr_debug("skip section(%d) %s\n", idx, name); } if (err) goto out; @@ -1119,8 +1121,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { - pr_warning("relocation failed: no %d section\n", - idx); + pr_warning("relocation failed: no section(%d)\n", idx); return -LIBBPF_ERRNO__RELOC; } -- cgit v1.2.3 From 864db336c677c288d81524c30a656804785902f9 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:22 +0100 Subject: selftests/bpf: add test program for loading BPF ELF files V2: Moved program into selftests/bpf from tools/libbpf This program can be used on its own for testing/debugging if a BPF ELF-object file can be loaded with libbpf (from tools/lib/bpf). If something is wrong with the ELF object, the program have a --debug mode that will display the ELF sections and especially the skipped sections. This allows for quickly identifying the problematic ELF section number, which can be corrolated with the readelf tool. The program signal error via return codes, and also have a --quiet mode, which is practical for use in scripts like selftests/bpf. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_libbpf_open.c | 150 +++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_libbpf_open.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 566d6adc172a..3f48da0866ba 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,7 +20,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ - sample_map_ret0.o test_tcpbpf_kern.o + sample_map_ret0.o test_tcpbpf_kern.o test_libbpf_open TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c new file mode 100644 index 000000000000..8fcd1c076add --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + */ +static const char *__doc__ = + "Libbpf test program for loading BPF ELF object files"; + +#include +#include +#include +#include +#include +#include + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"quiet", no_argument, NULL, 'q' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + + printf("\nDOCUMENTATION:\n%s\n\n", __doc__); + printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define DEFINE_PRINT_FN(name, enabled) \ +static int libbpf_##name(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + if (enabled) { \ + fprintf(stderr, "[" #name "] "); \ + ret = vfprintf(stderr, fmt, args); \ + } \ + va_end(args); \ + return ret; \ +} +DEFINE_PRINT_FN(warning, 1) +DEFINE_PRINT_FN(info, 1) +DEFINE_PRINT_FN(debug, 1) + +#define EXIT_FAIL_LIBBPF EXIT_FAILURE +#define EXIT_FAIL_OPTION 2 + +int test_walk_progs(struct bpf_object *obj, bool verbose) +{ + struct bpf_program *prog; + int cnt = 0; + + bpf_object__for_each_program(prog, obj) { + cnt++; + if (verbose) + printf("Prog (count:%d) section_name: %s\n", cnt, + bpf_program__title(prog, false)); + } + return 0; +} + +int test_walk_maps(struct bpf_object *obj, bool verbose) +{ + struct bpf_map *map; + int cnt = 0; + + bpf_map__for_each(map, obj) { + cnt++; + if (verbose) + printf("Map (count:%d) name: %s\n", cnt, + bpf_map__name(map)); + } + return 0; +} + +int test_open_file(char *filename, bool verbose) +{ + struct bpf_object *bpfobj = NULL; + long err; + + if (verbose) + printf("Open BPF ELF-file with libbpf: %s\n", filename); + + /* Load BPF ELF object file and check for errors */ + bpfobj = bpf_object__open(filename); + err = libbpf_get_error(bpfobj); + if (err) { + char err_buf[128]; + libbpf_strerror(err, err_buf, sizeof(err_buf)); + if (verbose) + printf("Unable to load eBPF objects in file '%s': %s\n", + filename, err_buf); + return EXIT_FAIL_LIBBPF; + } + test_walk_progs(bpfobj, verbose); + test_walk_maps(bpfobj, verbose); + + if (verbose) + printf("Close BPF ELF-file with libbpf: %s\n", + bpf_object__name(bpfobj)); + bpf_object__close(bpfobj); + + return 0; +} + +int main(int argc, char **argv) +{ + char filename[1024] = { 0 }; + bool verbose = 1; + int longindex = 0; + int opt; + + libbpf_set_print(libbpf_warning, libbpf_info, NULL); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hDq", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + libbpf_set_print(libbpf_warning, libbpf_info, + libbpf_debug); + break; + case 'q': /* Use in scripting mode */ + verbose = 0; + break; + case 'h': + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + if (optind >= argc) { + usage(argv); + printf("ERROR: Expected BPF_FILE argument after options\n"); + return EXIT_FAIL_OPTION; + } + snprintf(filename, sizeof(filename), "%s", argv[optind]); + + return test_open_file(filename, verbose); +} -- cgit v1.2.3 From f09b2e382e9a7053e3ae6f2fb6535efd5760cf5d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:27 +0100 Subject: selftests/bpf: add selftest that use test_libbpf_open This script test_libbpf.sh will be part of the 'make run_tests' invocation, but can also be invoked manually in this directory, and a verbose mode can be enabled via setting the environment variable $VERBOSE like: $ VERBOSE=yes ./test_libbpf.sh The script contains some tests that are commented out, as they currently fail. They are reminders about what we need to improve for the libbpf loader library. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 14 +++++++-- tools/testing/selftests/bpf/test_libbpf.sh | 49 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/bpf/test_libbpf.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3f48da0866ba..5c43c187f27c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -13,6 +13,7 @@ endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -lrt -lpthread +# Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user @@ -20,17 +21,26 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ - sample_map_ret0.o test_tcpbpf_kern.o test_libbpf_open + sample_map_ret0.o test_tcpbpf_kern.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ +# Order correspond to 'make run_tests' order +TEST_PROGS := test_kmod.sh \ + test_libbpf.sh \ + test_xdp_redirect.sh \ + test_xdp_meta.sh \ test_offload.py +# Compile but not part of 'make run_tests' +TEST_GEN_PROGS_EXTENDED = test_libbpf_open + include ../lib.mk BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c $(TEST_GEN_PROGS): $(BPFOBJ) +$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a + .PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh new file mode 100755 index 000000000000..d97dc914cd49 --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +export TESTNAME=test_libbpf + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 + echo "selftests: $TESTNAME [FAILED]"; + fi +} + +libbpf_open_file() +{ + LAST_LOADED=$1 + if [ -n "$VERBOSE" ]; then + ./test_libbpf_open $1 + else + ./test_libbpf_open --quiet $1 + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +libbpf_open_file test_l4lb.o + +# TODO: fix libbpf to load noinline functions +# [warning] libbpf: incorrect bpf_call opcode +#libbpf_open_file test_l4lb_noinline.o + +# TODO: fix test_xdp_meta.c to load with libbpf +# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version +#libbpf_open_file test_xdp_meta.o + +# TODO: fix libbpf to handle .eh_frame +# [warning] libbpf: relocation failed: no section(10) +#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o + +# Success +exit 0 -- cgit v1.2.3 From e3d91b0ca523d53158f435a3e13df7f0cb360ea2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:32 +0100 Subject: tools/libbpf: handle issues with bpf ELF objects containing .eh_frames V3: More generic skipping of relo-section (suggested by Daniel) If clang >= 4.0.1 is missing the option '-target bpf', it will cause llc/llvm to create two ELF sections for "Exception Frames", with section names '.eh_frame' and '.rel.eh_frame'. The BPF ELF loader library libbpf fails when loading files with these sections. The other in-kernel BPF ELF loader in samples/bpf/bpf_load.c, handle this gracefully. And iproute2 loader also seems to work with these "eh" sections. The issue in libbpf is caused by bpf_object__elf_collect() skipping some sections, and later when performing relocation it will be pointing to a skipped section, as these sections cannot be found by bpf_object__find_prog_by_idx() in bpf_object__collect_reloc(). This is a general issue that also occurs for other sections, like debug sections which are also skipped and can have relo section. As suggested by Daniel. To avoid keeping state about all skipped sections, instead perform a direct qlookup in the ELF object. Lookup the section that the relo-section points to and check if it contains executable machine instructions (denoted by the sh_flags SHF_EXECINSTR). Use this check to also skip irrelevant relo-sections. Note, for samples/bpf/ the '-target bpf' parameter to clang cannot be used due to incompatibility with asm embedded headers, that some of the samples include. This is explained in more details by Yonghong Song in bpf_devel_QA. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0c794f7fc050..97073d649c1a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -742,6 +742,24 @@ bpf_object__init_maps(struct bpf_object *obj) return 0; } +static bool section_have_execinstr(struct bpf_object *obj, int idx) +{ + Elf_Scn *scn; + GElf_Shdr sh; + + scn = elf_getscn(obj->efile.elf, idx); + if (!scn) + return false; + + if (gelf_getshdr(scn, &sh) != &sh) + return false; + + if (sh.sh_flags & SHF_EXECINSTR) + return true; + + return false; +} + static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; @@ -825,6 +843,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; int nr_reloc = obj->efile.nr_reloc + 1; + int sec = sh.sh_info; /* points to other section */ + + /* Only do relo for section with exec instructions */ + if (!section_have_execinstr(obj, sec)) { + pr_debug("skip relo %s(%d) for section(%d)\n", + name, idx, sec); + continue; + } reloc = realloc(reloc, sizeof(*obj->efile.reloc) * nr_reloc); -- cgit v1.2.3 From 99ce7962d52d1948ad6f2785e308d48e76e0a6ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Feb 2018 14:02:32 +0100 Subject: objtool: Fix switch-table detection Linus reported that GCC-7.3 generated a switch-table construct that confused objtool. It turns out that, in particular due to KASAN, it is possible to have unrelated .rodata usage in between the .rodata setup for the switch-table and the following indirect jump. The simple linear reverse search from the indirect jump would hit upon the KASAN .rodata usage first and fail to find a switch_table, resulting in a spurious 'sibling call with modified stack frame' warning. Fix this by creating a 'jump-stack' which we can 'unwind' during reversal, thereby skipping over much of the in-between code. This is not fool proof by any means, but is sufficient to make the known cases work. Future work would be to construct more comprehensive flow analysis code. Reported-and-tested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180208130232.GF25235@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 41 +++++++++++++++++++++++++++++++++++++++-- tools/objtool/check.h | 1 + 2 files changed, 40 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9cd028aa1509..2e458eb45586 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -851,8 +851,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, * This is a fairly uncommon pattern which is new for GCC 6. As of this * writing, there are 11 occurrences of it in the allmodconfig kernel. * + * As of GCC 7 there are quite a few more of these and the 'in between' code + * is significant. Esp. with KASAN enabled some of the code between the mov + * and jmpq uses .rodata itself, which can confuse things. + * * TODO: Once we have DWARF CFI and smarter instruction decoding logic, * ensure the same register is used in the mov and jump instructions. + * + * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ static struct rela *find_switch_table(struct objtool_file *file, struct symbol *func, @@ -874,12 +880,25 @@ static struct rela *find_switch_table(struct objtool_file *file, text_rela->addend + 4); if (!rodata_rela) return NULL; + file->ignore_unreachables = true; return rodata_rela; } /* case 3 */ - func_for_each_insn_continue_reverse(file, func, insn) { + /* + * Backward search using the @first_jump_src links, these help avoid + * much of the 'in between' code. Which avoids us getting confused by + * it. + */ + for (insn = list_prev_entry(insn, list); + + &insn->list != &file->insn_list && + insn->sec == func->sec && + insn->offset >= func->offset; + + insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { + if (insn->type == INSN_JUMP_DYNAMIC) break; @@ -909,14 +928,32 @@ static struct rela *find_switch_table(struct objtool_file *file, return NULL; } + static int add_func_switch_tables(struct objtool_file *file, struct symbol *func) { - struct instruction *insn, *prev_jump = NULL; + struct instruction *insn, *last = NULL, *prev_jump = NULL; struct rela *rela, *prev_rela = NULL; int ret; func_for_each_insn(file, func, insn) { + if (!last) + last = insn; + + /* + * Store back-pointers for unconditional forward jumps such + * that find_switch_table() can back-track using those and + * avoid some potentially confusing code. + */ + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && + insn->offset > last->offset && + insn->jump_dest->offset > insn->offset && + !insn->jump_dest->first_jump_src) { + + insn->jump_dest->first_jump_src = insn; + last = insn->jump_dest; + } + if (insn->type != INSN_JUMP_DYNAMIC) continue; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index dbadb304a410..23a1d065cae1 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -47,6 +47,7 @@ struct instruction { bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; + struct instruction *first_jump_src; struct list_head alts; struct symbol *func; struct stack_op stack_op; -- cgit v1.2.3 From 941ff6f11c020913f5cddf543a9ec63475d7c082 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Feb 2018 14:49:44 +0100 Subject: bpf: fix rlimit in reuseport net selftest Fix two issues in the reuseport_bpf selftests that were reported by Linaro CI: [...] + ./reuseport_bpf ---- IPv4 UDP ---- Testing EBPF mod 10... Reprograming, testing mod 5... ./reuseport_bpf: ebpf error. log: 0: (bf) r6 = r1 1: (20) r0 = *(u32 *)skb[0] 2: (97) r0 %= 10 3: (95) exit processed 4 insns : Operation not permitted + echo FAIL [...] ---- IPv4 TCP ---- Testing EBPF mod 10... ./reuseport_bpf: failed to bind send socket: Address already in use + echo FAIL [...] For the former adjust rlimit since this was the cause of failure for loading the BPF prog, and for the latter add SO_REUSEADDR. Reported-by: Naresh Kamboju Link: https://bugs.linaro.org/show_bug.cgi?id=3502 Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_bpf.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 4a8217448f20..cad14cd0ea92 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #ifndef ARRAY_SIZE @@ -190,11 +191,14 @@ static void send_from(struct test_params p, uint16_t sport, char *buf, struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport); struct sockaddr * const daddr = new_loopback_sockaddr(p.send_family, p.recv_port); - const int fd = socket(p.send_family, p.protocol, 0); + const int fd = socket(p.send_family, p.protocol, 0), one = 1; if (fd < 0) error(1, errno, "failed to create send socket"); + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) + error(1, errno, "failed to set reuseaddr"); + if (bind(fd, saddr, sockaddr_size())) error(1, errno, "failed to bind send socket"); @@ -433,6 +437,21 @@ void enable_fastopen(void) } } +static struct rlimit rlim_old, rlim_new; + +static __attribute__((constructor)) void main_ctor(void) +{ + getrlimit(RLIMIT_MEMLOCK, &rlim_old); + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +static __attribute__((destructor)) void main_dtor(void) +{ + setrlimit(RLIMIT_MEMLOCK, &rlim_old); +} + int main(void) { fprintf(stderr, "---- IPv4 UDP ----\n"); -- cgit v1.2.3 From 198ee8e17502da2634f7366395db1d77630e0219 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 12:10:10 +0100 Subject: selftests/x86: Fix vDSO selftest segfault for vsyscall=none The vDSO selftest tries to execute a vsyscall unconditionally, even if it is not present on the test system (e.g. if booted with vsyscall=none or with CONFIG_LEGACY_VSYSCALL_NONE=y set. Fix this by copying (and tweaking) the vsyscall check from test_vsyscall.c Signed-off-by: Dominik Brodowski Cc: Andrew Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/test_vdso.c | 50 ++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 29973cde06d3..558c8207e7b9 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -28,18 +28,52 @@ int nerrs = 0; +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); + +getcpu_t vgetcpu; +getcpu_t vdso_getcpu; + +static void *vsyscall_getcpu(void) +{ #ifdef __x86_64__ -# define VSYS(x) (x) + FILE *maps; + char line[128]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ + return NULL; + + while (fgets(line, sizeof(line), maps)) { + char r, x; + void *start, *end; + char name[128]; + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + /* assume entries are OK, as we test vDSO here not vsyscall */ + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("Warning: failed to find vsyscall getcpu\n"); + return NULL; + } + return (void *) (0xffffffffff600800); #else -# define VSYS(x) 0 + return NULL; #endif +} -typedef long (*getcpu_t)(unsigned *, unsigned *, void *); - -const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); -getcpu_t vdso_getcpu; -void fill_function_pointers() +static void fill_function_pointers() { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); @@ -54,6 +88,8 @@ void fill_function_pointers() vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); + + vgetcpu = (getcpu_t) vsyscall_getcpu(); } static long sys_getcpu(unsigned * cpu, unsigned * node, -- cgit v1.2.3 From d8e92de8ef952bed88c56c7a44c02d8dcae0984e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 21:59:24 +0100 Subject: selftests/x86: Clean up and document sscanf() usage Replace a couple of magically connected buffer length literal constants with a common definition that makes their relationship obvious. Also document why our sscanf() usage is safe. No intended functional changes. Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Andrew Lutomirski Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211205924.GA23210@light.dominikbrodowski.net Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/test_vdso.c | 11 ++++++++--- tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 558c8207e7b9..235259011704 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -26,6 +26,9 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + int nerrs = 0; typedef long (*getcpu_t)(unsigned *, unsigned *, void *); @@ -37,17 +40,19 @@ static void *vsyscall_getcpu(void) { #ifdef __x86_64__ FILE *maps; - char line[128]; + char line[MAPS_LINE_LEN]; bool found = false; maps = fopen("/proc/self/maps", "r"); if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ return NULL; - while (fgets(line, sizeof(line), maps)) { + while (fgets(line, MAPS_LINE_LEN, maps)) { char r, x; void *start, *end; - char name[128]; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", &start, &end, &r, &x, name) != 5) continue; diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 7a744fa7b786..be81621446f0 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -33,6 +33,9 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -98,7 +101,7 @@ static int init_vsys(void) #ifdef __x86_64__ int nerrs = 0; FILE *maps; - char line[128]; + char line[MAPS_LINE_LEN]; bool found = false; maps = fopen("/proc/self/maps", "r"); @@ -108,10 +111,12 @@ static int init_vsys(void) return 0; } - while (fgets(line, sizeof(line), maps)) { + while (fgets(line, MAPS_LINE_LEN, maps)) { char r, x; void *start, *end; - char name[128]; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", &start, &end, &r, &x, name) != 5) continue; -- cgit v1.2.3 From ce676638fe7b284132a7d7d5e7e7ad81bab9947e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2018 08:26:17 +0100 Subject: selftests/x86/pkeys: Remove unused functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also gets rid of two build warnings: protection_keys.c: In function ‘dumpit’: protection_keys.c:419:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] write(1, buf, nr_read); ^~~~~~~~~~~~~~~~~~~~~~ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Dave Hansen Cc: Shuah Khan Cc: Andy Lutomirski Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/protection_keys.c | 28 --------------------------- 1 file changed, 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index bc1b0735bb50..f15aa5a76fe3 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -393,34 +393,6 @@ pid_t fork_lazy_child(void) return forkret; } -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 -- cgit v1.2.3 From 7f95122067ab26fb8344b2a9de64ffbd0fea0010 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 12:10:09 +0100 Subject: selftests/x86: Fix build bug caused by the 5lvl test which has been moved to the VM directory Signed-off-by: Dominik Brodowski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Fixes: 235266b8e11c "selftests/vm: move 128TB mmap boundary test to generic directory" Link: http://lkml.kernel.org/r/20180211111013.16888-2-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5d4f10ac2af2..91fbfa8fdc15 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) -- cgit v1.2.3 From 2cbc0d66de0480449c75636f55697c7ff3af61fc Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 12:10:11 +0100 Subject: selftests/x86: Do not rely on "int $0x80" in test_mremap_vdso.c On 64-bit builds, we should not rely on "int $0x80" working (it only does if CONFIG_IA32_EMULATION=y is enabled). Without this patch, the move test may succeed, but the "int $0x80" causes a segfault, resulting in a false negative output of this self-test. Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/test_mremap_vdso.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index bf0d687c7db7..64f11c8d9b76 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp) vdso_size += PAGE_SIZE; } +#ifdef __i386__ /* Glibc is likely to explode now - exit with raw syscall */ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); +#else /* __x86_64__ */ + syscall(SYS_exit, ret); +#endif } else { int status; -- cgit v1.2.3 From ecdf06e1ea5376bba03c155751f6869d3dfaa210 Mon Sep 17 00:00:00 2001 From: Harish Date: Tue, 13 Feb 2018 12:02:55 +0530 Subject: selftests/powerpc: Fix to use ucontext_t instead of struct ucontext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With glibc 2.26 'struct ucontext' is removed to improve POSIX compliance, which breaks powerpc/alignment_handler selftest. Fix the test by using ucontext_t. Tested on ppc, works with older glibc versions as well. Fixes the following: alignment_handler.c: In function ‘sighandler’: alignment_handler.c:68:5: error: dereferencing pointer to incomplete type ‘struct ucontext’ ucp->uc_mcontext.gp_regs[PT_NIP] += 4; Signed-off-by: Harish Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/alignment/alignment_handler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 39fd362415cf..0f2698f9fd6d 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -57,7 +57,7 @@ volatile int gotsig; void sighandler(int sig, siginfo_t *info, void *ctx) { - struct ucontext *ucp = ctx; + ucontext_t *ucp = ctx; if (!testing) { signal(sig, SIG_DFL); -- cgit v1.2.3 From 4105c69703cdeba76f384b901712c9397b04e9c2 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Feb 2018 09:13:21 +0100 Subject: selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c On 64-bit builds, we should not rely on "int $0x80" working (it only does if CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80" test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build this test only if we can also build 32-bit binaries (which should be a good approximation for that). Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 ++ tools/testing/selftests/x86/single_step_syscall.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 91fbfa8fdc15..73b8ef665c98 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -30,11 +30,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) +EXTRA_CFLAGS += -DCAN_BUILD_32 endif ifeq ($(CAN_BUILD_X86_64),1) all: all_64 TEST_PROGS += $(BINARIES_64) +EXTRA_CFLAGS += -DCAN_BUILD_64 endif all_32: $(BINARIES_32) diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index a48da95c18fd..ddfdd635de16 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -119,7 +119,9 @@ static void check_result(void) int main() { +#ifdef CAN_BUILD_32 int tmp; +#endif sethandler(SIGTRAP, sigtrap, 0); @@ -139,12 +141,13 @@ int main() : : "c" (post_nop) : "r11"); check_result(); #endif - +#ifdef CAN_BUILD_32 printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) : INT80_CLOBBERS); check_result(); +#endif /* * This test is particularly interesting if fast syscalls use -- cgit v1.2.3 From 9279ddf23ce78ff2676e8e8e19fec0f022c26d04 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Feb 2018 09:15:19 +0100 Subject: selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems The ldt_gdt and ptrace_syscall selftests, even in their 64-bit variant, use hard-coded 32-bit syscall numbers and call "int $0x80". This will fail on 64-bit systems with CONFIG_IA32_EMULATION=y disabled. Therefore, do not build these tests if we cannot build 32-bit binaries (which should be a good approximation for CONFIG_IA32_EMULATION=y being enabled). Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 73b8ef665c98..aa6e2d7f6a1f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,16 +5,26 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ + check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ protection_keys test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip +# Some selftests require 32bit support enabled also on 64bit systems +TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall -TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11) +TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED) +endif + BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) @@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie -UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) - ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) -- cgit v1.2.3 From fe24e27128252c230a34a6c628da2bf1676781ea Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 8 Feb 2018 17:09:25 -0600 Subject: objtool: Fix segfault in ignore_unreachable_insn() Peter Zijlstra's patch for converting WARN() to use UD2 triggered a bunch of false "unreachable instruction" warnings, which then triggered a seg fault in ignore_unreachable_insn(). The seg fault happened when it tried to dereference a NULL 'insn->func' pointer. Thanks to static_cpu_has(), some functions can jump to a non-function area in the .altinstr_aux section. That breaks ignore_unreachable_insn()'s assumption that it's always inside the original function. Make sure ignore_unreachable_insn() only follows jumps within the current function. Reported-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild test robot Link: http://lkml.kernel.org/r/bace77a60d5af9b45eddb8f8fb9c776c8de657ef.1518130694.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2e458eb45586..c7fb5c2392ee 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1935,13 +1935,19 @@ static bool ignore_unreachable_insn(struct instruction *insn) if (is_kasan_insn(insn) || is_ubsan_insn(insn)) return true; - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { - insn = insn->jump_dest; - continue; + if (insn->type == INSN_JUMP_UNCONDITIONAL) { + if (insn->jump_dest && + insn->jump_dest->func == insn->func) { + insn = insn->jump_dest; + continue; + } + + break; } if (insn->offset + insn->len >= insn->func->offset + insn->func->len) break; + insn = list_next_entry(insn, list); } -- cgit v1.2.3 From 961888b1d76d84efc66a8f5604b06ac12ac2f978 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Mon, 18 Dec 2017 16:34:10 +0800 Subject: selftests/x86/mpx: Fix incorrect bounds with old _sigfault For distributions with old userspace header files, the _sigfault structure is different. mpx-mini-test fails with the following error: [root@Purley]# mpx-mini-test_64 tabletest XSAVE is supported by HW & OS XSAVE processor supported state mask: 0x2ff XSAVE OS supported state mask: 0x2ff BNDREGS: size: 64 user: 1 supervisor: 0 aligned: 0 BNDCSR: size: 64 user: 1 supervisor: 0 aligned: 0 starting mpx bounds table test ERROR: siginfo bounds do not match shadow bounds for register 0 Fix it by using the correct offset of _lower/_upper in _sigfault. RHEL needs this patch to work. Signed-off-by: Rui Wang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@linux.intel.com Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test") Link: http://lkml.kernel.org/r/1513586050-1641-1-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/mpx-mini-test.c | 32 +++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index ec0f6b45ce8b..9c0325e1ea68 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si) return si->si_upper; } #else + +/* + * This deals with old version of _sigfault in some distros: + * + +old _sigfault: + struct { + void *si_addr; + } _sigfault; + +new _sigfault: + struct { + void __user *_addr; + int _trapno; + short _addr_lsb; + union { + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + __u32 _pkey; + }; + } _sigfault; + * + */ + static inline void **__si_bounds_hack(siginfo_t *si) { void *sigfault = &si->_sifields._sigfault; void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); - void **__si_lower = end_sigfault; + int *trapno = (int*)end_sigfault; + /* skip _trapno and _addr_lsb */ + void **__si_lower = (void**)(trapno + 2); return __si_lower; } @@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si) static inline void *__si_bounds_upper(siginfo_t *si) { - return (*__si_bounds_hack(si)) + sizeof(void *); + return *(__si_bounds_hack(si) + 1); } #endif -- cgit v1.2.3 From 0b7c1528fb741803396da68a9d8d285ff7db731c Mon Sep 17 00:00:00 2001 From: William Cohen Date: Tue, 30 Jan 2018 22:28:13 -0500 Subject: perf vendor events aarch64: Add JSON metrics for ARM Cortex-A53 Processor Add JSON metrics for ARM Cortex-A53 Processor. Unlike the Intel processors there isn't a script that automatically generated these files. The patch was manually generated from the documentation and the previous oprofile ARM Cortex ac53 event file patch I made. The relevant documentation is in the "12.9 Events" section of the ARM Cortex A53 MPCore Processor Revision: r0p4 Technical Reference Manual. The ARM Cortex A53 manual is available at: http://infocenter.arm.com/help/topic/com.arm.doc.ddi0500g/DDI0500G_cortex_a53_trm.pdf Use that to look for additional information about the events. Signed-off-by: William Cohen Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180131032813.9564-1-wcohen@redhat.com [ Added references provided by William Cohen ] Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/arm64/cortex-a53/branch.json | 27 +++++++++++ .../perf/pmu-events/arch/arm64/cortex-a53/bus.json | 22 +++++++++ .../pmu-events/arch/arm64/cortex-a53/cache.json | 27 +++++++++++ .../pmu-events/arch/arm64/cortex-a53/memory.json | 22 +++++++++ .../pmu-events/arch/arm64/cortex-a53/other.json | 32 +++++++++++++ .../pmu-events/arch/arm64/cortex-a53/pipeline.json | 52 ++++++++++++++++++++++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 7 files changed, 183 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/other.json create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json new file mode 100644 index 000000000000..3b6208763e50 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json @@ -0,0 +1,27 @@ +[ + {, + "EventCode": "0x7A", + "EventName": "BR_INDIRECT_SPEC", + "BriefDescription": "Branch speculatively executed - Indirect branch" + }, + {, + "EventCode": "0xC9", + "EventName": "BR_COND", + "BriefDescription": "Conditional branch executed" + }, + {, + "EventCode": "0xCA", + "EventName": "BR_INDIRECT_MISPRED", + "BriefDescription": "Indirect branch mispredicted" + }, + {, + "EventCode": "0xCB", + "EventName": "BR_INDIRECT_MISPRED_ADDR", + "BriefDescription": "Indirect branch mispredicted because of address miscompare" + }, + {, + "EventCode": "0xCC", + "EventName": "BR_COND_MISPRED", + "BriefDescription": "Conditional branch mispredicted" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json new file mode 100644 index 000000000000..480d9f7460ab --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json @@ -0,0 +1,22 @@ +[ + {, + "EventCode": "0x60", + "EventName": "BUS_ACCESS_LD", + "BriefDescription": "Bus access - Read" + }, + {, + "EventCode": "0x61", + "EventName": "BUS_ACCESS_ST", + "BriefDescription": "Bus access - Write" + }, + {, + "EventCode": "0xC0", + "EventName": "EXT_MEM_REQ", + "BriefDescription": "External memory request" + }, + {, + "EventCode": "0xC1", + "EventName": "EXT_MEM_REQ_NC", + "BriefDescription": "Non-cacheable external memory request" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json new file mode 100644 index 000000000000..11baad6344b9 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json @@ -0,0 +1,27 @@ +[ + {, + "EventCode": "0xC2", + "EventName": "PREFETCH_LINEFILL", + "BriefDescription": "Linefill because of prefetch" + }, + {, + "EventCode": "0xC3", + "EventName": "PREFETCH_LINEFILL_DROP", + "BriefDescription": "Instruction Cache Throttle occurred" + }, + {, + "EventCode": "0xC4", + "EventName": "READ_ALLOC_ENTER", + "BriefDescription": "Entering read allocate mode" + }, + {, + "EventCode": "0xC5", + "EventName": "READ_ALLOC", + "BriefDescription": "Read allocate mode" + }, + {, + "EventCode": "0xC8", + "EventName": "EXT_SNOOP", + "BriefDescription": "SCU Snooped data from another CPU for this CPU" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json new file mode 100644 index 000000000000..480d9f7460ab --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json @@ -0,0 +1,22 @@ +[ + {, + "EventCode": "0x60", + "EventName": "BUS_ACCESS_LD", + "BriefDescription": "Bus access - Read" + }, + {, + "EventCode": "0x61", + "EventName": "BUS_ACCESS_ST", + "BriefDescription": "Bus access - Write" + }, + {, + "EventCode": "0xC0", + "EventName": "EXT_MEM_REQ", + "BriefDescription": "External memory request" + }, + {, + "EventCode": "0xC1", + "EventName": "EXT_MEM_REQ_NC", + "BriefDescription": "Non-cacheable external memory request" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json new file mode 100644 index 000000000000..73a22402d003 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json @@ -0,0 +1,32 @@ +[ + {, + "EventCode": "0x86", + "EventName": "EXC_IRQ", + "BriefDescription": "Exception taken, IRQ" + }, + {, + "EventCode": "0x87", + "EventName": "EXC_FIQ", + "BriefDescription": "Exception taken, FIQ" + }, + {, + "EventCode": "0xC6", + "EventName": "PRE_DECODE_ERR", + "BriefDescription": "Pre-decode error" + }, + {, + "EventCode": "0xD0", + "EventName": "L1I_CACHE_ERR", + "BriefDescription": "L1 Instruction Cache (data or tag) memory error" + }, + {, + "EventCode": "0xD1", + "EventName": "L1D_CACHE_ERR", + "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable" + }, + {, + "EventCode": "0xD2", + "EventName": "TLB_ERR", + "BriefDescription": "TLB memory error" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json new file mode 100644 index 000000000000..3149fb90555a --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json @@ -0,0 +1,52 @@ +[ + {, + "EventCode": "0xC7", + "EventName": "STALL_SB_FULL", + "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full" + }, + {, + "EventCode": "0xE0", + "EventName": "OTHER_IQ_DEP_STALL", + "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error" + }, + {, + "EventCode": "0xE1", + "EventName": "IC_DEP_STALL", + "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed" + }, + {, + "EventCode": "0xE2", + "EventName": "IUTLB_DEP_STALL", + "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed" + }, + {, + "EventCode": "0xE3", + "EventName": "DECODE_DEP_STALL", + "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed" + }, + {, + "EventCode": "0xE4", + "EventName": "OTHER_INTERLOCK_STALL", + "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction" + }, + {, + "EventCode": "0xE5", + "EventName": "AGU_DEP_STALL", + "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU" + }, + {, + "EventCode": "0xE6", + "EventName": "SIMD_DEP_STALL", + "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation." + }, + {, + "EventCode": "0xE7", + "EventName": "LD_DEP_STALL", + "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss" + }, + {, + "EventCode": "0xE8", + "EventName": "ST_DEP_STALL", + "BriefDescription": "Cycles there is a stall in the Wr stage because of a store" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 219d6756134e..e61c9ca6cf9e 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -13,3 +13,4 @@ # #Family-model,Version,Filename,EventType 0x00000000420f5160,v1,cavium,core +0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core -- cgit v1.2.3 From 6888ff66c44ffa3077ed69e978902d0ff4b84ae1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:16 -0800 Subject: perf evlist: Remove stale mmap read for backward perf_evlist__mmap_read_catchup() and perf_evlist__mmap_read_backward() are only for overwrite mode. But they read the evlist->mmap buffer which is for non-overwrite mode. It did not bring any serious problem yet, because there is no one use it. Remove the unused interfaces. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Wang Nan Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1516310792-208685-2-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 17 ----------------- tools/perf/util/evlist.h | 4 ---- 2 files changed, 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ac35cd214feb..e5fc14e53c05 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int return perf_mmap__read_forward(md); } -union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) -{ - struct perf_mmap *md = &evlist->mmap[idx]; - - /* - * No need to check messup for backward ring buffer: - * We can always read arbitrary long data from a backward - * ring buffer unless we forget to pause it before reading. - */ - return perf_mmap__read_backward(md); -} - union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { return perf_evlist__mmap_read_forward(evlist, idx); } -void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) -{ - perf_mmap__read_catchup(&evlist->mmap[idx]); -} - void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { perf_mmap__consume(&evlist->mmap[idx], false); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 75f8e0ad5d76..336b838e6957 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx); -union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, - int idx); -void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); - void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); int perf_evlist__open(struct perf_evlist *evlist); -- cgit v1.2.3 From dc6c35c679e96987dc83a003f30bc2cc33c84c00 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:17 -0800 Subject: perf mmap: Recalculate size for overwrite mode In perf_mmap__push(), the 'size' need to be recalculated, otherwise the invalid data might be pushed to the record in overwrite mode. The issue is introduced by commit 7fb4b407a124 ("perf mmap: Don't discard prev in backward mode"). When the ring buffer is full in overwrite mode, backward_rb_find_range() will be called to recalculate the 'start' and 'end'. The 'size' needs to be recalculated accordingly. Unconditionally recalculate the 'size', not just for full ring buffer in overwrite mode. Because: - There is no harmful to recalculate the 'size' for other cases. - The code of calculating 'start' and 'end' will be factored out later. The new function does not need to return 'size'. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Fixes: 7fb4b407a124 ("perf mmap: Don't discard prev in backward mode") Link: http://lkml.kernel.org/r/1516310792-208685-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 05076e683938..97cf4fab564b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -302,6 +302,8 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, return -1; } + size = end - start; + if ((start & md->mask) + size != (end & md->mask)) { buf = &data[start & md->mask]; size = md->mask + 1 - (start & md->mask); -- cgit v1.2.3 From f92c8cbe597a5a2ccec702dff824f3fe0f3623eb Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:18 -0800 Subject: perf mmap: Cleanup perf_mmap__push() The first assignment for 'start' and 'end' is redundant. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-4-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 97cf4fab564b..fbbbe87f0308 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -272,7 +272,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, { u64 head = perf_mmap__read_head(md); u64 old = md->prev; - u64 end = head, start = old; + u64 end, start; unsigned char *data = md->base + page_size; unsigned long size; void *buf; -- cgit v1.2.3 From 8872481bd04850b19e053dc579de5a11b83b16fc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:19 -0800 Subject: perf mmap: Introduce perf_mmap__read_init() The new function perf_mmap__read_init() is factored out from perf_mmap__push(). It is to calculate the 'start' and 'end' of the available data in ringbuffer. No functional change. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-5-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 37 +++++++++++++++++++++++++++---------- tools/perf/util/mmap.h | 2 ++ 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index fbbbe87f0308..c19a4e640e8e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -267,24 +267,24 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6 return -1; } -int perf_mmap__push(struct perf_mmap *md, bool overwrite, - void *to, int push(void *to, void *buf, size_t size)) +/* + * Report the start and end of the available data in ringbuffer + */ +int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, + u64 *startp, u64 *endp) { u64 head = perf_mmap__read_head(md); u64 old = md->prev; - u64 end, start; unsigned char *data = md->base + page_size; unsigned long size; - void *buf; - int rc = 0; - start = overwrite ? head : old; - end = overwrite ? old : head; + *startp = overwrite ? head : old; + *endp = overwrite ? old : head; - if (start == end) + if (*startp == *endp) return 0; - size = end - start; + size = *endp - *startp; if (size > (unsigned long)(md->mask) + 1) { if (!overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); @@ -298,10 +298,27 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) + if (overwrite_rb_find_range(data, md->mask, head, startp, endp)) return -1; } + return 1; +} + +int perf_mmap__push(struct perf_mmap *md, bool overwrite, + void *to, int push(void *to, void *buf, size_t size)) +{ + u64 head = perf_mmap__read_head(md); + u64 end, start; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int rc = 0; + + rc = perf_mmap__read_init(md, overwrite, &start, &end); + if (rc < 1) + return rc; + size = end - start; if ((start & md->mask) + size != (end & md->mask)) { diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e43d7b55a55f..9ab2b48df65b 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -94,4 +94,6 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, size_t perf_mmap__mmap_len(struct perf_mmap *map); +int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, + u64 *startp, u64 *endp); #endif /*__PERF_MMAP_H */ -- cgit v1.2.3 From 189f2cc91f9f2efef5d5f4dde43684c01b5f6f2f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:20 -0800 Subject: perf mmap: Add new return value logic for perf_mmap__read_init() Improve the readability by using meaningful enum (-EAGAIN, -EINVAL and 0) to replace the three returning states (0, -1 and 1). Suggested-by: Wang Nan Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-6-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index c19a4e640e8e..38fa69dc635e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -282,7 +282,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, *endp = overwrite ? old : head; if (*startp == *endp) - return 0; + return -EAGAIN; size = *endp - *startp; if (size > (unsigned long)(md->mask) + 1) { @@ -291,7 +291,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, md->prev = head; perf_mmap__consume(md, overwrite); - return 0; + return -EAGAIN; } /* @@ -299,10 +299,10 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, * most of data from it. */ if (overwrite_rb_find_range(data, md->mask, head, startp, endp)) - return -1; + return -EINVAL; } - return 1; + return 0; } int perf_mmap__push(struct perf_mmap *md, bool overwrite, @@ -316,8 +316,8 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, int rc = 0; rc = perf_mmap__read_init(md, overwrite, &start, &end); - if (rc < 1) - return rc; + if (rc < 0) + return (rc == -EAGAIN) ? 0 : -1; size = end - start; -- cgit v1.2.3 From b4b036b4c76341a5034e872aca3727c4988a7304 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:21 -0800 Subject: perf mmap: Discard 'prev' in perf_mmap__read() The 'start' and 'prev' variables are duplicates in perf_mmap__read(). Use 'map->prev' to replace 'start' in perf_mmap__read_*(). Suggested-by: Wang Nan Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-7-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 38fa69dc635e..125bfda9d037 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) /* When check_messup is true, 'end' must points to a good entry */ static union perf_event *perf_mmap__read(struct perf_mmap *map, - u64 start, u64 end, u64 *prev) + u64 *startp, u64 end) { unsigned char *data = map->base + page_size; union perf_event *event = NULL; - int diff = end - start; + int diff = end - *startp; if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[start & map->mask]; + event = (union perf_event *)&data[*startp & map->mask]; size = event->header.size; - if (size < sizeof(event->header) || diff < (int)size) { - event = NULL; - goto broken_event; - } + if (size < sizeof(event->header) || diff < (int)size) + return NULL; /* * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((start & map->mask) + size != ((start + size) & map->mask)) { - unsigned int offset = start; + if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + unsigned int offset = *startp; unsigned int len = min(sizeof(*event), size), cpy; void *dst = map->event_copy; @@ -59,20 +57,15 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, event = (union perf_event *)map->event_copy; } - start += size; + *startp += size; } -broken_event: - if (prev) - *prev = start; - return event; } union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; - u64 old = map->prev; /* * Check if event was unmapped due to a POLLHUP/POLLERR. @@ -82,13 +75,12 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map) head = perf_mmap__read_head(map); - return perf_mmap__read(map, old, head, &map->prev); + return perf_mmap__read(map, &map->prev, head); } union perf_event *perf_mmap__read_backward(struct perf_mmap *map) { u64 head, end; - u64 start = map->prev; /* * Check if event was unmapped due to a POLLHUP/POLLERR. @@ -118,7 +110,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) else end = head + map->mask + 1; - return perf_mmap__read(map, start, end, &map->prev); + return perf_mmap__read(map, &map->prev, end); } void perf_mmap__read_catchup(struct perf_mmap *map) -- cgit v1.2.3 From ee023de05f35484691f7d9e5c1f92195ac4d64d2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:22 -0800 Subject: perf mmap: Introduce perf_mmap__read_done() The direction of overwrite mode is backward. The last perf_mmap__read() will set tail to map->prev. Need to correct the map->prev to head which is the end of next read. It will be used later. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-8-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 11 +++++++++++ tools/perf/util/mmap.h | 1 + 2 files changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 125bfda9d037..4f59eaefc706 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -338,3 +338,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, out: return rc; } + +/* + * Mandatory for overwrite mode + * The direction of overwrite mode is backward. + * The last perf_mmap__read() will set tail to map->prev. + * Need to correct the map->prev to head which is the end of next read. + */ +void perf_mmap__read_done(struct perf_mmap *map) +{ + map->prev = perf_mmap__read_head(map); +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 9ab2b48df65b..95549d4af943 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -96,4 +96,5 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, u64 *startp, u64 *endp); +void perf_mmap__read_done(struct perf_mmap *map); #endif /*__PERF_MMAP_H */ -- cgit v1.2.3 From 7bb45972952db9298fe5cc440160dcad1a66bfbc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:23 -0800 Subject: perf mmap: Introduce perf_mmap__read_event() Except for 'perf record', the other perf tools read events one by one from the ring buffer using perf_mmap__read_forward(). But it only supports non-overwrite mode. Introduce perf_mmap__read_event() to support both non-overwrite and overwrite mode. Usage: perf_mmap__read_init() while(event = perf_mmap__read_event()) { //process the event perf_mmap__consume() } perf_mmap__read_done() It cannot use perf_mmap__read_backward(). Because it always reads the stale buffer which is already processed. Furthermore, the forward and backward concepts have been removed. The perf_mmap__read_backward() will be replaced and discarded later. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-9-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 39 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/mmap.h | 4 ++++ 2 files changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 4f59eaefc706..f804926778b7 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -113,6 +113,45 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) return perf_mmap__read(map, &map->prev, end); } +/* + * Read event from ring buffer one by one. + * Return one event for each call. + * + * Usage: + * perf_mmap__read_init() + * while(event = perf_mmap__read_event()) { + * //process the event + * perf_mmap__consume() + * } + * perf_mmap__read_done() + */ +union perf_event *perf_mmap__read_event(struct perf_mmap *map, + bool overwrite, + u64 *startp, u64 end) +{ + union perf_event *event; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + if (startp == NULL) + return NULL; + + /* non-overwirte doesn't pause the ringbuffer */ + if (!overwrite) + end = perf_mmap__read_head(map); + + event = perf_mmap__read(map, startp, end); + + if (!overwrite) + map->prev = *startp; + + return event; +} + void perf_mmap__read_catchup(struct perf_mmap *map) { u64 head; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 95549d4af943..28718543dd42 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -89,6 +89,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); +union perf_event *perf_mmap__read_event(struct perf_mmap *map, + bool overwrite, + u64 *startp, u64 end); + int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)); -- cgit v1.2.3 From 600a7cfe88de2c6e44e23d61dd721b996b790eb2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:24 -0800 Subject: perf test: Update mmap read functions for backward-ring-buffer test Use the new perf_mmap__read_* interfaces for overwrite ringbuffer test. Commiter notes: Testing: [root@seventh ~]# perf test -v backward 48: Read backward ring buffer : --- start --- test child forked, pid 8309 Using CPUID GenuineIntel-6-9E mmap size 1052672B mmap size 8192B Finished reading overwrite ring buffer: rewind test child finished with 0 ---- end ---- Read backward ring buffer: Ok [root@seventh ~]# Signed-off-by: Kan Liang Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-10-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/backward-ring-buffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 4035d43523c3..e0b1b414d466 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, int i; for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; + u64 start, end; - perf_mmap__read_catchup(&evlist->overwrite_mmap[i]); - while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) { + perf_mmap__read_init(map, true, &start, &end); + while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) { const u32 type = event->header.type; switch (type) { @@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, return TEST_FAIL; } } + perf_mmap__read_done(map); } return TEST_OK; } -- cgit v1.2.3 From 3effc2f165a842d640873e29d4c5cc1650143aef Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:25 -0800 Subject: perf mmap: Discard legacy interface for mmap read Discards perf_mmap__read_backward() and perf_mmap__read_catchup(). No tools use them. There are tools still use perf_mmap__read_forward(). Keep it, but add comments to point to the new interface for future use. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-11-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 50 ++++---------------------------------------------- tools/perf/util/mmap.h | 3 --- 2 files changed, 4 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index f804926778b7..91531a7c8fbf 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -63,6 +63,10 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, return event; } +/* + * legacy interface for mmap read. + * Don't use it. Use perf_mmap__read_event(). + */ union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; @@ -78,41 +82,6 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map) return perf_mmap__read(map, &map->prev, head); } -union perf_event *perf_mmap__read_backward(struct perf_mmap *map) -{ - u64 head, end; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->refcnt)) - return NULL; - - head = perf_mmap__read_head(map); - if (!head) - return NULL; - - /* - * 'head' pointer starts from 0. Kernel minus sizeof(record) form - * it each time when kernel writes to it, so in fact 'head' is - * negative. 'end' pointer is made manually by adding the size of - * the ring buffer to 'head' pointer, means the validate data can - * read is the whole ring buffer. If 'end' is positive, the ring - * buffer has not fully filled, so we must adjust 'end' to 0. - * - * However, since both 'head' and 'end' is unsigned, we can't - * simply compare 'end' against 0. Here we compare '-head' and - * the size of the ring buffer, where -head is the number of bytes - * kernel write to the ring buffer. - */ - if (-head < (u64)(map->mask + 1)) - end = 0; - else - end = head + map->mask + 1; - - return perf_mmap__read(map, &map->prev, end); -} - /* * Read event from ring buffer one by one. * Return one event for each call. @@ -152,17 +121,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map, return event; } -void perf_mmap__read_catchup(struct perf_mmap *map) -{ - u64 head; - - if (!refcount_read(&map->refcnt)) - return; - - head = perf_mmap__read_head(map); - map->prev = head; -} - static bool perf_mmap__empty(struct perf_mmap *map) { return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 28718543dd42..ec7d3a24e276 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map); void perf_mmap__consume(struct perf_mmap *map, bool overwrite); -void perf_mmap__read_catchup(struct perf_mmap *md); - static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; @@ -87,7 +85,6 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) } union perf_event *perf_mmap__read_forward(struct perf_mmap *map); -union perf_event *perf_mmap__read_backward(struct perf_mmap *map); union perf_event *perf_mmap__read_event(struct perf_mmap *map, bool overwrite, -- cgit v1.2.3 From 63878a53cedc3df31bd4ba8740a49fa0fc116ac6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:26 -0800 Subject: perf top: Check per-event overwrite term Per-event overwrite term is not forbidden in 'perf top', which can bring problems. Because 'perf top' only support non-overwrite mode now. Add new rules and check regarding to overwrite term for 'perf top'. - All events either have same per-event term or don't have per-event mode setting. Otherwise, it will error out. - Per-event overwrite term should be consistent as opts->overwrite. If not, updating the opts->overwrite according to per-event term. Make it possible to support either non-overwrite or overwrite mode. The overwrite mode is forbidden now, which will be removed when the overwrite mode is supported later. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-12-git-send-email-kan.liang@intel.com [ Renamed perf_top_overwrite_check to perf_top__overwrite_check, to follow existing convention ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c6ccda52117d..17783798924a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -881,6 +881,68 @@ static void perf_top__mmap_read(struct perf_top *top) perf_top__mmap_read_idx(top, i); } +/* + * Check per-event overwrite term. + * perf top should support consistent term for all events. + * - All events don't have per-event term + * E.g. "cpu/cpu-cycles/,cpu/instructions/" + * Nothing change, return 0. + * - All events have same per-event term + * E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/ + * Using the per-event setting to replace the opts->overwrite if + * they are different, then return 0. + * - Events have different per-event term + * E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/" + * Return -1 + * - Some of the event set per-event term, but some not. + * E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/" + * Return -1 + */ +static int perf_top__overwrite_check(struct perf_top *top) +{ + struct record_opts *opts = &top->record_opts; + struct perf_evlist *evlist = top->evlist; + struct perf_evsel_config_term *term; + struct list_head *config_terms; + struct perf_evsel *evsel; + int set, overwrite = -1; + + evlist__for_each_entry(evlist, evsel) { + set = -1; + config_terms = &evsel->config_terms; + list_for_each_entry(term, config_terms, list) { + if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE) + set = term->val.overwrite ? 1 : 0; + } + + /* no term for current and previous event (likely) */ + if ((overwrite < 0) && (set < 0)) + continue; + + /* has term for both current and previous event, compare */ + if ((overwrite >= 0) && (set >= 0) && (overwrite != set)) + return -1; + + /* no term for current event but has term for previous one */ + if ((overwrite >= 0) && (set < 0)) + return -1; + + /* has term for current event */ + if ((overwrite < 0) && (set >= 0)) { + /* if it's first event, set overwrite */ + if (evsel == perf_evlist__first(evlist)) + overwrite = set; + else + return -1; + } + } + + if ((overwrite >= 0) && (opts->overwrite != overwrite)) + opts->overwrite = overwrite; + + return 0; +} + static int perf_top__start_counters(struct perf_top *top) { char msg[BUFSIZ]; @@ -888,6 +950,17 @@ static int perf_top__start_counters(struct perf_top *top) struct perf_evlist *evlist = top->evlist; struct record_opts *opts = &top->record_opts; + if (perf_top__overwrite_check(top)) { + ui__error("perf top only support consistent per-event " + "overwrite setting for all events\n"); + goto out_err; + } + + if (opts->overwrite) { + ui__error("not support overwrite mode yet\n"); + goto out_err; + } + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, counter) { -- cgit v1.2.3 From 9a831b3a32c5daf5d7cc672334d51930f78e4ea3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Feb 2018 11:27:25 -0300 Subject: perf evsel: Expose the perf_missing_features struct As tools may need to adjust to missing features, as 'perf top' will, in the next csets, to cope with a missing 'write_backward' feature. Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-jelngl9q1ooaizvkcput9tic@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 12 +----------- tools/perf/util/evsel.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ff359c9ece2e..ef351688b797 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -41,17 +41,7 @@ #include "sane_ctype.h" -static struct { - bool sample_id_all; - bool exclude_guest; - bool mmap2; - bool cloexec; - bool clockid; - bool clockid_wrong; - bool lbr_flags; - bool write_backward; - bool group_read; -} perf_missing_features; +struct perf_missing_features perf_missing_features; static clockid_t clockid; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 846e41644525..a7487c6d1866 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -149,6 +149,20 @@ union u64_swap { u32 val32[2]; }; +struct perf_missing_features { + bool sample_id_all; + bool exclude_guest; + bool mmap2; + bool cloexec; + bool clockid; + bool clockid_wrong; + bool lbr_flags; + bool write_backward; + bool group_read; +}; + +extern struct perf_missing_features perf_missing_features; + struct cpu_map; struct target; struct thread_map; -- cgit v1.2.3 From 204721d7eabe6ee98aafce791ce3efdbc4715834 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:28 -0800 Subject: perf top: Add overwrite fall back Switch to non-overwrite mode if kernel doesnot support overwrite ringbuffer. It's only effect when overwrite mode is supported. No change to current behavior. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-14-git-send-email-kan.liang@intel.com [ Use perf_missing_features.write_backward instead of the non merged is_write_backward_fail() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 17783798924a..ee4bba1e282c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -943,6 +943,27 @@ static int perf_top__overwrite_check(struct perf_top *top) return 0; } +static int perf_top_overwrite_fallback(struct perf_top *top, + struct perf_evsel *evsel) +{ + struct record_opts *opts = &top->record_opts; + struct perf_evlist *evlist = top->evlist; + struct perf_evsel *counter; + + if (!opts->overwrite) + return 0; + + /* only fall back when first event fails */ + if (evsel != perf_evlist__first(evlist)) + return 0; + + evlist__for_each_entry(evlist, counter) + counter->attr.write_backward = false; + opts->overwrite = false; + ui__warning("fall back to non-overwrite mode\n"); + return 1; +} + static int perf_top__start_counters(struct perf_top *top) { char msg[BUFSIZ]; @@ -967,6 +988,21 @@ static int perf_top__start_counters(struct perf_top *top) try_again: if (perf_evsel__open(counter, top->evlist->cpus, top->evlist->threads) < 0) { + + /* + * Specially handle overwrite fall back. + * Because perf top is the only tool which has + * overwrite mode by default, support + * both overwrite and non-overwrite mode, and + * require consistent mode for all events. + * + * May move it to generic code with more tools + * have similar attribute. + */ + if (perf_missing_features.write_backward && + perf_top_overwrite_fallback(top, counter)) + goto try_again; + if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); -- cgit v1.2.3 From 06cc1a470ab237b991901729b125404c164f3660 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:29 -0800 Subject: perf hists browser: Add parameter to disable lost event warning For overwrite mode, the ringbuffer will be paused. The event lost is expected. It needs a way to notify the browser not print the warning. It will be used later for perf top to disable lost event warning in overwrite mode. There is no behavior change for now. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-15-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 4 ++-- tools/perf/builtin-report.c | 3 ++- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/hists.c | 38 +++++++++++++++++++++++++------------- tools/perf/ui/browsers/hists.h | 3 ++- tools/perf/util/hist.h | 6 ++++-- 6 files changed, 36 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c0815a37fdb5..539c3d460158 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "? - help"); + key = hist_browser__run(browser, "? - help", true); switch (key) { case 's': @@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "? - help"); + key = hist_browser__run(browser, "? - help", true); switch (key) { case 'q': diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 42a52dcc41cd..4ad5dc649716 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep) case 1: ret = perf_evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env); + &session->header.env, + true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ee4bba1e282c..7def861a9ec4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -611,7 +611,7 @@ static void *display_thread_tui(void *arg) perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env); + &top->session->header.env, true); done = 1; return NULL; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 68146f4620a5..6495ee55d9c3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si return browser->title ? browser->title(browser, bf, size) : 0; } -int hist_browser__run(struct hist_browser *browser, const char *help) +int hist_browser__run(struct hist_browser *browser, const char *help, + bool warn_lost_event) { int key; char title[160]; @@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help) nr_entries = hist_browser__nr_entries(browser); ui_browser__update_nr_entries(&browser->b, nr_entries); - if (browser->hists->stats.nr_lost_warned != - browser->hists->stats.nr_events[PERF_RECORD_LOST]) { + if (warn_lost_event && + (browser->hists->stats.nr_lost_warned != + browser->hists->stats.nr_events[PERF_RECORD_LOST])) { browser->hists->stats.nr_lost_warned = browser->hists->stats.nr_events[PERF_RECORD_LOST]; ui_browser__warn_lost_events(&browser->b); @@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env) + struct perf_env *env, + bool warn_lost_event) { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); @@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, nr_options = 0; - key = hist_browser__run(browser, helpline); + key = hist_browser__run(browser, helpline, + warn_lost_event); if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); @@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser, static int perf_evsel_menu__run(struct perf_evsel_menu *menu, int nr_events, const char *help, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + bool warn_lost_event) { struct perf_evlist *evlist = menu->b.priv; struct perf_evsel *pos; @@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu, case K_TIMER: hbt->timer(hbt->arg); - if (!menu->lost_events_warned && menu->lost_events) { + if (!menu->lost_events_warned && + menu->lost_events && + warn_lost_event) { ui_browser__warn_lost_events(&menu->b); menu->lost_events_warned = true; } @@ -3224,7 +3231,8 @@ browse_hists: key = perf_evsel__hists_browse(pos, nr_events, help, true, hbt, menu->min_pcnt, - menu->env); + menu->env, + warn_lost_event); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env) + struct perf_env *env, + bool warn_lost_event) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, menu.b.width = line_len; } - return perf_evsel_menu__run(&menu, nr_entries, help, hbt); + return perf_evsel_menu__run(&menu, nr_entries, help, + hbt, warn_lost_event); } int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env) + struct perf_env *env, + bool warn_lost_event) { int nr_entries = evlist->nr_entries; @@ -3325,7 +3336,7 @@ single_entry: return perf_evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env); + env, warn_lost_event); } if (symbol_conf.event_group) { @@ -3342,5 +3353,6 @@ single_entry: } return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, - hbt, min_pcnt, env); + hbt, min_pcnt, env, + warn_lost_event); } diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index ba431777f559..9428bee076f2 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -28,7 +28,8 @@ struct hist_browser { struct hist_browser *hist_browser__new(struct hists *hists); void hist_browser__delete(struct hist_browser *browser); -int hist_browser__run(struct hist_browser *browser, const char *help); +int hist_browser__run(struct hist_browser *browser, const char *help, + bool warn_lost_event); void hist_browser__init(struct hist_browser *browser, struct hists *hists); #endif /* _PERF_UI_BROWSER_HISTS_H_ */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f6630cb95eff..02721b579746 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env); + struct perf_env *env, + bool warn_lost_event); int script_browse(const char *script_opt); #else static inline @@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, const char *help __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, - struct perf_env *env __maybe_unused) + struct perf_env *env __maybe_unused, + bool warn_lost_event __maybe_unused) { return 0; } -- cgit v1.2.3 From a1ff5b05e988ca3620027148cd61013408ea4194 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:30 -0800 Subject: perf top: Remove lost events checking There would be some records lost in overwrite mode because of pausing the ringbuffer. It has little impact for the accuracy of the snapshot and could be tolerated by 'perf top'. Remove the lost events checking. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-16-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7def861a9ec4..59653062bb48 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top) printf("%-*.*s\n", win_width, win_width, graph_dotted_line); - if (hists->stats.nr_lost_warned != - hists->stats.nr_events[PERF_RECORD_LOST]) { + if (!top->record_opts.overwrite && + (hists->stats.nr_lost_warned != + hists->stats.nr_events[PERF_RECORD_LOST])) { hists->stats.nr_lost_warned = hists->stats.nr_events[PERF_RECORD_LOST]; color_fprintf(stdout, PERF_COLOR_RED, @@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg) perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env, true); + &top->session->header.env, + !top->record_opts.overwrite); done = 1; return NULL; -- cgit v1.2.3 From ebebbf082357f86cc84a4d46ce897a5750e41b7a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:31 -0800 Subject: perf top: Switch default mode to overwrite mode perf_top__mmap_read() has a severe performance issue in the Knights Landing/Mill platform, when monitoring heavy load systems. It costs several minutes to finish, which is unacceptable. Currently, 'perf top' uses the non overwrite mode. For non overwrite mode, it tries to read everything in the ringbuffer and doesn't pause it. Once there are lots of samples delivered persistently, the processing time could be very long. Also, the latest samples could be lost when the ringbuffer is full. For overwrite mode, it takes a snapshot for the system by pausing the ringbuffer, which could significantly reduce the processing time. Also, the overwrite mode always keep the latest samples. Considering the real time requirement for 'perf top', the overwrite mode is more suitable for it. Actually, 'perf top' was overwrite mode. It is changed to non overwrite mode since commit 93fc64f14472 ("perf top: Switch to non overwrite mode"). It's better to change it back to overwrite mode by default. For the kernel which doesn't support overwrite mode, it will fall back to non overwrite mode. There would be some records lost in overwrite mode because of pausing the ringbuffer. It has little impact for the accuracy of the snapshot and can be tolerated. For overwrite mode, unconditionally wait 100 ms before each snapshot. It also reduces the overhead caused by pausing ringbuffer, especially on light load system. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-17-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 59653062bb48..2b4914f34ed6 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -809,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool, static void perf_top__mmap_read_idx(struct perf_top *top, int idx) { + struct record_opts *opts = &top->record_opts; + struct perf_evlist *evlist = top->evlist; struct perf_sample sample; struct perf_evsel *evsel; + struct perf_mmap *md; struct perf_session *session = top->session; union perf_event *event; struct machine *machine; + u64 end, start; int ret; - while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { - ret = perf_evlist__parse_sample(top->evlist, event, &sample); + md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; + if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0) + return; + + while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) { + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); goto next_event; @@ -871,16 +879,28 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) } else ++session->evlist->stats.nr_unknown_events; next_event: - perf_evlist__mmap_consume(top->evlist, idx); + perf_mmap__consume(md, opts->overwrite); } + + perf_mmap__read_done(md); } static void perf_top__mmap_read(struct perf_top *top) { + bool overwrite = top->record_opts.overwrite; + struct perf_evlist *evlist = top->evlist; int i; + if (overwrite) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); + for (i = 0; i < top->evlist->nr_mmaps; i++) perf_top__mmap_read_idx(top, i); + + if (overwrite) { + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + } } /* @@ -979,11 +999,6 @@ static int perf_top__start_counters(struct perf_top *top) goto out_err; } - if (opts->overwrite) { - ui__error("not support overwrite mode yet\n"); - goto out_err; - } - perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, counter) { @@ -1144,7 +1159,7 @@ static int __cmd_top(struct perf_top *top) perf_top__mmap_read(top); - if (hits == top->samples) + if (opts->overwrite || (hits == top->samples)) ret = perf_evlist__poll(top->evlist, 100); if (resize) { @@ -1238,6 +1253,7 @@ int cmd_top(int argc, const char **argv) .uses_mmap = true, }, .proc_map_timeout = 500, + .overwrite = 1, }, .max_stack = sysctl_perf_event_max_stack, .sym_pcnt_filter = 5, -- cgit v1.2.3 From 8cc42de736b617827a4e7664fb8d7a325bc125bc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 18 Jan 2018 13:26:32 -0800 Subject: perf top: Check the latency of perf_top__mmap_read() The latency of perf_top__mmap_read() should be lower than refresh time. If not, give some hints to reduce the latency. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1516310792-208685-18-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2b4914f34ed6..b7c823ba8374 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -889,8 +889,10 @@ static void perf_top__mmap_read(struct perf_top *top) { bool overwrite = top->record_opts.overwrite; struct perf_evlist *evlist = top->evlist; + unsigned long long start, end; int i; + start = rdclock(); if (overwrite) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); @@ -901,6 +903,13 @@ static void perf_top__mmap_read(struct perf_top *top) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } + end = rdclock(); + + if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC) + ui__warning("Too slow to read ring buffer.\n" + "Please try increasing the period (-c) or\n" + "decreasing the freq (-F) or\n" + "limiting the number of CPUs (-C)\n"); } /* -- cgit v1.2.3 From 6677d26c8befa462eab9be6c5335a939011e7e65 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 29 Jan 2018 15:03:59 +0200 Subject: perf tools: Substitute yet another strtoull() Instead of home grown function let's use what library provides us. Signed-off-by: Andriy Shevchenko Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180129130359.1490-1-andriy.shevchenko@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 443892dabedb..1019bbc5dbd8 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -340,35 +340,15 @@ size_t hex_width(u64 v) return n; } -static int hex(char ch) -{ - if ((ch >= '0') && (ch <= '9')) - return ch - '0'; - if ((ch >= 'a') && (ch <= 'f')) - return ch - 'a' + 10; - if ((ch >= 'A') && (ch <= 'F')) - return ch - 'A' + 10; - return -1; -} - /* * While we find nice hex chars, build a long_val. * Return number of chars processed. */ int hex2u64(const char *ptr, u64 *long_val) { - const char *p = ptr; - *long_val = 0; - - while (*p) { - const int hex_val = hex(*p); + char *p; - if (hex_val < 0) - break; - - *long_val = (*long_val << 4) | hex_val; - p++; - } + *long_val = strtoull(ptr, &p, 16); return p - ptr; } -- cgit v1.2.3 From ba7e851642f48002def3450b279598c187721fd0 Mon Sep 17 00:00:00 2001 From: Sangwon Hong Date: Mon, 5 Feb 2018 20:48:35 +0900 Subject: perf data: Document missing --force option Add the --force option to the man page. Signed-off-by: Sangwon Hong Cc: Jiri Olsa Cc: Namhyung Kim Cc: Taeung Song Link: http://lkml.kernel.org/r/1517831315-31490-1-git-send-email-qpakzk@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-data.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index f0796a47dfa3..90bb4aabe4f8 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -30,6 +30,10 @@ OPTIONS for 'convert' -i:: Specify input perf data file path. +-f:: +--force:: + Don't complain, do it. + -v:: --verbose:: Be more verbose (show counter open errors, etc). -- cgit v1.2.3 From 7a92453620d42c3a5fea94a864dc6aa04c262b93 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 17 Jan 2018 09:38:31 +0100 Subject: perf test: Fix test trace+probe_libc_inet_pton.sh for s390x On Intel test case trace+probe_libc_inet_pton.sh succeeds and the output is: [root@f27 perf]# ./perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.037 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.037/0.037/0.037/0.000 ms 0.000 probe_libc:inet_pton:(7fa40ac618a0)) __GI___inet_pton (/usr/lib64/libc-2.26.so) getaddrinfo (/usr/lib64/libc-2.26.so) main (/usr/bin/ping) The kernel stack unwinder is used, it is specified implicitly as call-graph=fp (frame pointer). On s390x only dwarf is available for stack unwinding. It is also done in user space. This requires different parameter setup and result checking for s390x and Intel. This patch adds separate perf trace setup and result checking for Intel and s390x. On s390x specify this command line to get a call-graph and handle the different call graph result checking: [root@s35lp76 perf]# ./perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.041 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.041/0.041/0.041/0.000 ms 0.000 probe_libc:inet_pton:(3ffb9942060)) __GI___inet_pton (/usr/lib64/libc-2.26.so) gaih_inet (inlined) __GI_getaddrinfo (inlined) main (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) _start (/usr/bin/ping) [root@s35lp76 perf]# Before: [root@s8360047 perf]# ./perf test -vv 58 58: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 26349 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.079 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.079/0.079/0.079/0.000 ms 0.000 probe_libc:inet_pton:(3ff925c2060)) test child finished with -1 ---- end ---- probe libc's inet_pton & backtrace it with ping: FAILED! [root@s8360047 perf]# After: [root@s35lp76 perf]# ./perf test -vv 57 57: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 38708 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.038 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.038/0.038/0.038/0.000 ms 0.000 probe_libc:inet_pton:(3ff87342060)) __GI___inet_pton (/usr/lib64/libc-2.26.so) gaih_inet (inlined) __GI_getaddrinfo (inlined) main (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) _start (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok [root@s35lp76 perf]# On Intel the test case runs unchanged and succeeds. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180117083831.101001-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/tests/shell/trace+probe_libc_inet_pton.sh | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 8b3da21a08f1..c446c894b297 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() { expected[4]="rtt min.*" expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" expected[6]=".*inet_pton[[:space:]]\($libc\)$" - expected[7]="getaddrinfo[[:space:]]\($libc\)$" - expected[8]=".*\(.*/bin/ping.*\)$" - - perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do + case "$(uname -m)" in + s390x) + eventattr='call-graph=dwarf' + expected[7]="gaih_inet[[:space:]]\(inlined\)$" + expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$" + expected[9]="main[[:space:]]\(.*/bin/ping.*\)$" + expected[10]="__libc_start_main[[:space:]]\($libc\)$" + expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$" + ;; + *) + eventattr='max-stack=3' + expected[7]="getaddrinfo[[:space:]]\($libc\)$" + expected[8]=".*\(.*/bin/ping.*\)$" + ;; + esac + + perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do echo $line echo "$line" | egrep -q "${expected[$idx]}" if [ $? -ne 0 ] ; then @@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() { exit 1 fi let idx+=1 - [ $idx -eq 9 ] && break + [ -z "${expected[$idx]}" ] && break done } -- cgit v1.2.3 From f091f1d6a2b4840c9b631d6138f5354401347863 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2018 12:54:58 +0100 Subject: tools/headers: Synchronize kernel ABI headers, v4.16-rc1 Sync the following tooling headers with the latest kernel version: tools/arch/powerpc/include/uapi/asm/kvm.h tools/arch/x86/include/asm/cpufeatures.h tools/include/uapi/drm/i915_drm.h tools/include/uapi/linux/if_link.h tools/include/uapi/linux/kvm.h All the changes are new ABI additions which don't impact their use in existing tooling. Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Namhyung Kim Cc: Jiri Olsa Cc: Stephen Rothwell Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/arch/powerpc/include/uapi/asm/kvm.h | 2 + tools/arch/x86/include/asm/cpufeatures.h | 1 + tools/include/uapi/drm/i915_drm.h | 77 ++++++++++++++++++++++++++ tools/include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/kvm.h | 90 +++++++++++++++++++++++++++++++ 5 files changed, 171 insertions(+) (limited to 'tools') diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 637b7263cb86..833ed9a16adf 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) +#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 1d9199e1c2ad..0dfe4d3f74e2 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -210,6 +210,7 @@ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ +#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index ac3c6503ca27..536ee4febd74 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -86,6 +86,62 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; +/* + * Different engines serve different roles, and there may be more than one + * engine serving each role. enum drm_i915_gem_engine_class provides a + * classification of the role of the engine, which may be used when requesting + * operations to be performed on a certain subset of engines, or for providing + * information about that group. + */ +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) + +#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use @@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 + typedef struct drm_i915_getparam { __s32 param; /* diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8616131e2c61..6d9447700e18 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -163,6 +163,7 @@ enum { IFLA_IF_NETNSID, IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, + IFLA_NEW_IFINDEX, __IFLA_MAX }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 8fb90a0819c3..0fb5ef939732 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1362,6 +1362,96 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_S390_CMMA_MIGRATION */ #define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) +/* Memory Encryption Commands */ +#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long) + +struct kvm_enc_region { + __u64 addr; + __u64 size; +}; + +#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) +#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; +}; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v1.2.3 From baa676103037e0dd145bb905eb51bc0b2f48fd49 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 8 Feb 2018 12:47:49 +0100 Subject: perf s390: Grab a copy of arch/s390/kernel/syscall/syscall.tbl Grab a copy of the s390 system call table file introduced with commit 857f46bfb07f53dc112d69bdfb137cc5ec3da7c5 "s390/syscalls: add system call table". Signed-off-by: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Thomas Richter Cc: linux-s390@vger.kernel.org LPU-Reference: 1518090470-2899-3-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-hpw7vdjp7g92ivgpddrp5ydq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 390 ++++++++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 tools/perf/arch/s390/entry/syscalls/syscall.tbl (limited to 'tools') diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl new file mode 100644 index 000000000000..b38d48464368 --- /dev/null +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -0,0 +1,390 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# System call table for s390 +# +# Format: +# +# +# +# where can be common, 64, or 32 + +1 common exit sys_exit sys_exit +2 common fork sys_fork sys_fork +3 common read sys_read compat_sys_s390_read +4 common write sys_write compat_sys_s390_write +5 common open sys_open compat_sys_open +6 common close sys_close sys_close +7 common restart_syscall sys_restart_syscall sys_restart_syscall +8 common creat sys_creat compat_sys_creat +9 common link sys_link compat_sys_link +10 common unlink sys_unlink compat_sys_unlink +11 common execve sys_execve compat_sys_execve +12 common chdir sys_chdir compat_sys_chdir +13 32 time - compat_sys_time +14 common mknod sys_mknod compat_sys_mknod +15 common chmod sys_chmod compat_sys_chmod +16 32 lchown - compat_sys_s390_lchown16 +19 common lseek sys_lseek compat_sys_lseek +20 common getpid sys_getpid sys_getpid +21 common mount sys_mount compat_sys_mount +22 common umount sys_oldumount compat_sys_oldumount +23 32 setuid - compat_sys_s390_setuid16 +24 32 getuid - compat_sys_s390_getuid16 +25 32 stime - compat_sys_stime +26 common ptrace sys_ptrace compat_sys_ptrace +27 common alarm sys_alarm sys_alarm +29 common pause sys_pause sys_pause +30 common utime sys_utime compat_sys_utime +33 common access sys_access compat_sys_access +34 common nice sys_nice sys_nice +36 common sync sys_sync sys_sync +37 common kill sys_kill sys_kill +38 common rename sys_rename compat_sys_rename +39 common mkdir sys_mkdir compat_sys_mkdir +40 common rmdir sys_rmdir compat_sys_rmdir +41 common dup sys_dup sys_dup +42 common pipe sys_pipe compat_sys_pipe +43 common times sys_times compat_sys_times +45 common brk sys_brk compat_sys_brk +46 32 setgid - compat_sys_s390_setgid16 +47 32 getgid - compat_sys_s390_getgid16 +48 common signal sys_signal compat_sys_signal +49 32 geteuid - compat_sys_s390_geteuid16 +50 32 getegid - compat_sys_s390_getegid16 +51 common acct sys_acct compat_sys_acct +52 common umount2 sys_umount compat_sys_umount +54 common ioctl sys_ioctl compat_sys_ioctl +55 common fcntl sys_fcntl compat_sys_fcntl +57 common setpgid sys_setpgid sys_setpgid +60 common umask sys_umask sys_umask +61 common chroot sys_chroot compat_sys_chroot +62 common ustat sys_ustat compat_sys_ustat +63 common dup2 sys_dup2 sys_dup2 +64 common getppid sys_getppid sys_getppid +65 common getpgrp sys_getpgrp sys_getpgrp +66 common setsid sys_setsid sys_setsid +67 common sigaction sys_sigaction compat_sys_sigaction +70 32 setreuid - compat_sys_s390_setreuid16 +71 32 setregid - compat_sys_s390_setregid16 +72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend +73 common sigpending sys_sigpending compat_sys_sigpending +74 common sethostname sys_sethostname compat_sys_sethostname +75 common setrlimit sys_setrlimit compat_sys_setrlimit +76 32 getrlimit - compat_sys_old_getrlimit +77 common getrusage sys_getrusage compat_sys_getrusage +78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday +79 common settimeofday sys_settimeofday compat_sys_settimeofday +80 32 getgroups - compat_sys_s390_getgroups16 +81 32 setgroups - compat_sys_s390_setgroups16 +83 common symlink sys_symlink compat_sys_symlink +85 common readlink sys_readlink compat_sys_readlink +86 common uselib sys_uselib compat_sys_uselib +87 common swapon sys_swapon compat_sys_swapon +88 common reboot sys_reboot compat_sys_reboot +89 common readdir - compat_sys_old_readdir +90 common mmap sys_old_mmap compat_sys_s390_old_mmap +91 common munmap sys_munmap compat_sys_munmap +92 common truncate sys_truncate compat_sys_truncate +93 common ftruncate sys_ftruncate compat_sys_ftruncate +94 common fchmod sys_fchmod sys_fchmod +95 32 fchown - compat_sys_s390_fchown16 +96 common getpriority sys_getpriority sys_getpriority +97 common setpriority sys_setpriority sys_setpriority +99 common statfs sys_statfs compat_sys_statfs +100 common fstatfs sys_fstatfs compat_sys_fstatfs +101 32 ioperm - - +102 common socketcall sys_socketcall compat_sys_socketcall +103 common syslog sys_syslog compat_sys_syslog +104 common setitimer sys_setitimer compat_sys_setitimer +105 common getitimer sys_getitimer compat_sys_getitimer +106 common stat sys_newstat compat_sys_newstat +107 common lstat sys_newlstat compat_sys_newlstat +108 common fstat sys_newfstat compat_sys_newfstat +110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +111 common vhangup sys_vhangup sys_vhangup +112 common idle - - +114 common wait4 sys_wait4 compat_sys_wait4 +115 common swapoff sys_swapoff compat_sys_swapoff +116 common sysinfo sys_sysinfo compat_sys_sysinfo +117 common ipc sys_s390_ipc compat_sys_s390_ipc +118 common fsync sys_fsync sys_fsync +119 common sigreturn sys_sigreturn compat_sys_sigreturn +120 common clone sys_clone compat_sys_clone +121 common setdomainname sys_setdomainname compat_sys_setdomainname +122 common uname sys_newuname compat_sys_newuname +124 common adjtimex sys_adjtimex compat_sys_adjtimex +125 common mprotect sys_mprotect compat_sys_mprotect +126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask +127 common create_module - - +128 common init_module sys_init_module compat_sys_init_module +129 common delete_module sys_delete_module compat_sys_delete_module +130 common get_kernel_syms - - +131 common quotactl sys_quotactl compat_sys_quotactl +132 common getpgid sys_getpgid sys_getpgid +133 common fchdir sys_fchdir sys_fchdir +134 common bdflush sys_bdflush compat_sys_bdflush +135 common sysfs sys_sysfs compat_sys_sysfs +136 common personality sys_s390_personality sys_s390_personality +137 common afs_syscall - - +138 32 setfsuid - compat_sys_s390_setfsuid16 +139 32 setfsgid - compat_sys_s390_setfsgid16 +140 32 _llseek - compat_sys_llseek +141 common getdents sys_getdents compat_sys_getdents +142 32 _newselect - compat_sys_select +142 64 select sys_select - +143 common flock sys_flock sys_flock +144 common msync sys_msync compat_sys_msync +145 common readv sys_readv compat_sys_readv +146 common writev sys_writev compat_sys_writev +147 common getsid sys_getsid sys_getsid +148 common fdatasync sys_fdatasync sys_fdatasync +149 common _sysctl sys_sysctl compat_sys_sysctl +150 common mlock sys_mlock compat_sys_mlock +151 common munlock sys_munlock compat_sys_munlock +152 common mlockall sys_mlockall sys_mlockall +153 common munlockall sys_munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam +155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep compat_sys_nanosleep +163 common mremap sys_mremap compat_sys_mremap +164 32 setresuid - compat_sys_s390_setresuid16 +165 32 getresuid - compat_sys_s390_getresuid16 +167 common query_module - - +168 common poll sys_poll compat_sys_poll +169 common nfsservctl - - +170 32 setresgid - compat_sys_s390_setresgid16 +171 32 getresgid - compat_sys_s390_getresgid16 +172 common prctl sys_prctl compat_sys_prctl +173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend +180 common pread64 sys_pread64 compat_sys_s390_pread64 +181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 +182 32 chown - compat_sys_s390_chown16 +183 common getcwd sys_getcwd compat_sys_getcwd +184 common capget sys_capget compat_sys_capget +185 common capset sys_capset compat_sys_capset +186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack +187 common sendfile sys_sendfile64 compat_sys_sendfile +188 common getpmsg - - +189 common putpmsg - - +190 common vfork sys_vfork sys_vfork +191 32 ugetrlimit - compat_sys_getrlimit +191 64 getrlimit sys_getrlimit - +192 32 mmap2 - compat_sys_s390_mmap2 +193 32 truncate64 - compat_sys_s390_truncate64 +194 32 ftruncate64 - compat_sys_s390_ftruncate64 +195 32 stat64 - compat_sys_s390_stat64 +196 32 lstat64 - compat_sys_s390_lstat64 +197 32 fstat64 - compat_sys_s390_fstat64 +198 32 lchown32 - compat_sys_lchown +198 64 lchown sys_lchown - +199 32 getuid32 - sys_getuid +199 64 getuid sys_getuid - +200 32 getgid32 - sys_getgid +200 64 getgid sys_getgid - +201 32 geteuid32 - sys_geteuid +201 64 geteuid sys_geteuid - +202 32 getegid32 - sys_getegid +202 64 getegid sys_getegid - +203 32 setreuid32 - sys_setreuid +203 64 setreuid sys_setreuid - +204 32 setregid32 - sys_setregid +204 64 setregid sys_setregid - +205 32 getgroups32 - compat_sys_getgroups +205 64 getgroups sys_getgroups - +206 32 setgroups32 - compat_sys_setgroups +206 64 setgroups sys_setgroups - +207 32 fchown32 - sys_fchown +207 64 fchown sys_fchown - +208 32 setresuid32 - sys_setresuid +208 64 setresuid sys_setresuid - +209 32 getresuid32 - compat_sys_getresuid +209 64 getresuid sys_getresuid - +210 32 setresgid32 - sys_setresgid +210 64 setresgid sys_setresgid - +211 32 getresgid32 - compat_sys_getresgid +211 64 getresgid sys_getresgid - +212 32 chown32 - compat_sys_chown +212 64 chown sys_chown - +213 32 setuid32 - sys_setuid +213 64 setuid sys_setuid - +214 32 setgid32 - sys_setgid +214 64 setgid sys_setgid - +215 32 setfsuid32 - sys_setfsuid +215 64 setfsuid sys_setfsuid - +216 32 setfsgid32 - sys_setfsgid +216 64 setfsgid sys_setfsgid - +217 common pivot_root sys_pivot_root compat_sys_pivot_root +218 common mincore sys_mincore compat_sys_mincore +219 common madvise sys_madvise compat_sys_madvise +220 common getdents64 sys_getdents64 compat_sys_getdents64 +221 32 fcntl64 - compat_sys_fcntl64 +222 common readahead sys_readahead compat_sys_s390_readahead +223 32 sendfile64 - compat_sys_sendfile64 +224 common setxattr sys_setxattr compat_sys_setxattr +225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr +226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr +227 common getxattr sys_getxattr compat_sys_getxattr +228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr +229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr +230 common listxattr sys_listxattr compat_sys_listxattr +231 common llistxattr sys_llistxattr compat_sys_llistxattr +232 common flistxattr sys_flistxattr compat_sys_flistxattr +233 common removexattr sys_removexattr compat_sys_removexattr +234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr +235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr +236 common gettid sys_gettid sys_gettid +237 common tkill sys_tkill sys_tkill +238 common futex sys_futex compat_sys_futex +239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity +241 common tgkill sys_tgkill sys_tgkill +243 common io_setup sys_io_setup compat_sys_io_setup +244 common io_destroy sys_io_destroy compat_sys_io_destroy +245 common io_getevents sys_io_getevents compat_sys_io_getevents +246 common io_submit sys_io_submit compat_sys_io_submit +247 common io_cancel sys_io_cancel compat_sys_io_cancel +248 common exit_group sys_exit_group sys_exit_group +249 common epoll_create sys_epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl +251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait +252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 +254 common timer_create sys_timer_create compat_sys_timer_create +255 common timer_settime sys_timer_settime compat_sys_timer_settime +256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime compat_sys_clock_settime +260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime +261 common clock_getres sys_clock_getres compat_sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 +265 common statfs64 sys_statfs64 compat_sys_statfs64 +266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages +268 common mbind sys_mbind compat_sys_mbind +269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +271 common mq_open sys_mq_open compat_sys_mq_open +272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +275 common mq_notify sys_mq_notify compat_sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr +277 common kexec_load sys_kexec_load compat_sys_kexec_load +278 common add_key sys_add_key compat_sys_add_key +279 common request_key sys_request_key compat_sys_request_key +280 common keyctl sys_keyctl compat_sys_keyctl +281 common waitid sys_waitid compat_sys_waitid +282 common ioprio_set sys_ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages +288 common openat sys_openat compat_sys_openat +289 common mkdirat sys_mkdirat compat_sys_mkdirat +290 common mknodat sys_mknodat compat_sys_mknodat +291 common fchownat sys_fchownat compat_sys_fchownat +292 common futimesat sys_futimesat compat_sys_futimesat +293 32 fstatat64 - compat_sys_s390_fstatat64 +293 64 newfstatat sys_newfstatat - +294 common unlinkat sys_unlinkat compat_sys_unlinkat +295 common renameat sys_renameat compat_sys_renameat +296 common linkat sys_linkat compat_sys_linkat +297 common symlinkat sys_symlinkat compat_sys_symlinkat +298 common readlinkat sys_readlinkat compat_sys_readlinkat +299 common fchmodat sys_fchmodat compat_sys_fchmodat +300 common faccessat sys_faccessat compat_sys_faccessat +301 common pselect6 sys_pselect6 compat_sys_pselect6 +302 common ppoll sys_ppoll compat_sys_ppoll +303 common unshare sys_unshare compat_sys_unshare +304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list +305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list +306 common splice sys_splice compat_sys_splice +307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range +308 common tee sys_tee compat_sys_tee +309 common vmsplice sys_vmsplice compat_sys_vmsplice +310 common move_pages sys_move_pages compat_sys_move_pages +311 common getcpu sys_getcpu compat_sys_getcpu +312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait +313 common utimes sys_utimes compat_sys_utimes +314 common fallocate sys_fallocate compat_sys_s390_fallocate +315 common utimensat sys_utimensat compat_sys_utimensat +316 common signalfd sys_signalfd compat_sys_signalfd +317 common timerfd - - +318 common eventfd sys_eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 +323 common eventfd2 sys_eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 compat_sys_pipe2 +326 common dup3 sys_dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv compat_sys_preadv +329 common pwritev sys_pwritev compat_sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open +332 common fanotify_init sys_fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark +334 common prlimit64 sys_prlimit64 compat_sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +338 common syncfs sys_syncfs sys_syncfs +339 common setns sys_setns sys_setns +340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp compat_sys_kcmp +344 common finit_module sys_finit_module compat_sys_finit_module +345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr +346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr +347 common renameat2 sys_renameat2 compat_sys_renameat2 +348 common seccomp sys_seccomp compat_sys_seccomp +349 common getrandom sys_getrandom compat_sys_getrandom +350 common memfd_create sys_memfd_create compat_sys_memfd_create +351 common bpf sys_bpf compat_sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read +354 common execveat sys_execveat compat_sys_execveat +355 common userfaultfd sys_userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg +359 common socket sys_socket sys_socket +360 common socketpair sys_socketpair compat_sys_socketpair +361 common bind sys_bind compat_sys_bind +362 common connect sys_connect compat_sys_connect +363 common listen sys_listen sys_listen +364 common accept4 sys_accept4 compat_sys_accept4 +365 common getsockopt sys_getsockopt compat_sys_getsockopt +366 common setsockopt sys_setsockopt compat_sys_setsockopt +367 common getsockname sys_getsockname compat_sys_getsockname +368 common getpeername sys_getpeername compat_sys_getpeername +369 common sendto sys_sendto compat_sys_sendto +370 common sendmsg sys_sendmsg compat_sys_sendmsg +371 common recvfrom sys_recvfrom compat_sys_recvfrom +372 common recvmsg sys_recvmsg compat_sys_recvmsg +373 common shutdown sys_shutdown sys_shutdown +374 common mlock2 sys_mlock2 compat_sys_mlock2 +375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range +376 common preadv2 sys_preadv2 compat_sys_preadv2 +377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage +379 common statx sys_statx compat_sys_statx +380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi -- cgit v1.2.3 From 690d22d9d4423b4522fb44a71145403eef2df834 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 8 Feb 2018 12:47:50 +0100 Subject: perf s390: Rework system call table creation by using syscall.tbl Recently, s390 uses a syscall.tbl input file to generate its system call table and unistd uapi header files. Hence, update mksyscalltbl to use it as input to create the system table for perf. Signed-off-by: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Thomas Richter Cc: linux-s390@vger.kernel.org LPU-Reference: 1518090470-2899-4-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-bdyhllhsq1zgxv2qx4m377y6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 10 +++++++--- tools/perf/arch/s390/entry/syscalls/mksyscalltbl | 18 +++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 48228de415d0..dfa6e3103437 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 out := $(OUTPUT)arch/s390/include/generated/asm header := $(out)/syscalls_64.c -sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h -sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ +syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl +sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls +sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ + @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + (diff -B $(sysdef) $(syskrn) >/dev/null) \ + || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true + $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ clean:: $(call QUIET_CLEAN, s390) $(RM) $(header) diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl index 7fa0d0abd419..72ecbb676370 100755 --- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl @@ -3,25 +3,23 @@ # # Generate system call table for perf # -# -# Copyright IBM Corp. 2017 +# Copyright IBM Corp. 2017, 2018 # Author(s): Hendrik Brueckner # -gcc=$1 -input=$2 +SYSCALL_TBL=$1 -if ! test -r $input; then +if ! test -r $SYSCALL_TBL; then echo "Could not read input file" >&2 exit 1 fi create_table() { - local max_nr + local max_nr nr abi sc discard echo 'static const char *syscalltbl_s390_64[] = {' - while read sc nr; do + while read nr abi sc discard; do printf '\t[%d] = "%s",\n' $nr $sc max_nr=$nr done @@ -29,8 +27,6 @@ create_table() echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" } - -$gcc -m64 -E -dM -x c $input \ - |sed -ne 's/^#define __NR_//p' \ - |sort -t' ' -k2 -nu \ +grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL \ + |sort -k1 -n \ |create_table -- cgit v1.2.3 From f1d0b4cde922863004ce3f5f39e8662cc0686c96 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 8 Feb 2018 12:47:48 +0100 Subject: Revert "tools include s390: Grab a copy of arch/s390/include/uapi/asm/unistd.h" This reverts commit f120c7b187e6c418238710b48723ce141f467543 which is no longer required with the introduction of a syscall.tbl on s390. Signed-off-by: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Thomas Richter Cc: linux-s390@vger.kernel.org LPU-Reference: 1518090470-2899-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-q1lg0nvhha1tk39ri9aqalcb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/unistd.h | 412 ------------------------------ tools/perf/check-headers.sh | 1 - 2 files changed, 413 deletions(-) delete mode 100644 tools/arch/s390/include/uapi/asm/unistd.h (limited to 'tools') diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h deleted file mode 100644 index 725120939051..000000000000 --- a/tools/arch/s390/include/uapi/asm/unistd.h +++ /dev/null @@ -1,412 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S390 version - * - * Derived from "include/asm-i386/unistd.h" - */ - -#ifndef _UAPI_ASM_S390_UNISTD_H_ -#define _UAPI_ASM_S390_UNISTD_H_ - -/* - * This file contains the system call numbers. - */ - -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_restart_syscall 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_brk 45 -#define __NR_signal 48 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_setpgid 57 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_symlink 83 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_lookup_dcookie 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_getdents 141 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 -#define __NR_putpmsg 189 -#define __NR_vfork 190 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_getdents64 220 -#define __NR_readahead 222 -#define __NR_setxattr 224 -#define __NR_lsetxattr 225 -#define __NR_fsetxattr 226 -#define __NR_getxattr 227 -#define __NR_lgetxattr 228 -#define __NR_fgetxattr 229 -#define __NR_listxattr 230 -#define __NR_llistxattr 231 -#define __NR_flistxattr 232 -#define __NR_removexattr 233 -#define __NR_lremovexattr 234 -#define __NR_fremovexattr 235 -#define __NR_gettid 236 -#define __NR_tkill 237 -#define __NR_futex 238 -#define __NR_sched_setaffinity 239 -#define __NR_sched_getaffinity 240 -#define __NR_tgkill 241 -/* Number 242 is reserved for tux */ -#define __NR_io_setup 243 -#define __NR_io_destroy 244 -#define __NR_io_getevents 245 -#define __NR_io_submit 246 -#define __NR_io_cancel 247 -#define __NR_exit_group 248 -#define __NR_epoll_create 249 -#define __NR_epoll_ctl 250 -#define __NR_epoll_wait 251 -#define __NR_set_tid_address 252 -#define __NR_fadvise64 253 -#define __NR_timer_create 254 -#define __NR_timer_settime 255 -#define __NR_timer_gettime 256 -#define __NR_timer_getoverrun 257 -#define __NR_timer_delete 258 -#define __NR_clock_settime 259 -#define __NR_clock_gettime 260 -#define __NR_clock_getres 261 -#define __NR_clock_nanosleep 262 -/* Number 263 is reserved for vserver */ -#define __NR_statfs64 265 -#define __NR_fstatfs64 266 -#define __NR_remap_file_pages 267 -#define __NR_mbind 268 -#define __NR_get_mempolicy 269 -#define __NR_set_mempolicy 270 -#define __NR_mq_open 271 -#define __NR_mq_unlink 272 -#define __NR_mq_timedsend 273 -#define __NR_mq_timedreceive 274 -#define __NR_mq_notify 275 -#define __NR_mq_getsetattr 276 -#define __NR_kexec_load 277 -#define __NR_add_key 278 -#define __NR_request_key 279 -#define __NR_keyctl 280 -#define __NR_waitid 281 -#define __NR_ioprio_set 282 -#define __NR_ioprio_get 283 -#define __NR_inotify_init 284 -#define __NR_inotify_add_watch 285 -#define __NR_inotify_rm_watch 286 -#define __NR_migrate_pages 287 -#define __NR_openat 288 -#define __NR_mkdirat 289 -#define __NR_mknodat 290 -#define __NR_fchownat 291 -#define __NR_futimesat 292 -#define __NR_unlinkat 294 -#define __NR_renameat 295 -#define __NR_linkat 296 -#define __NR_symlinkat 297 -#define __NR_readlinkat 298 -#define __NR_fchmodat 299 -#define __NR_faccessat 300 -#define __NR_pselect6 301 -#define __NR_ppoll 302 -#define __NR_unshare 303 -#define __NR_set_robust_list 304 -#define __NR_get_robust_list 305 -#define __NR_splice 306 -#define __NR_sync_file_range 307 -#define __NR_tee 308 -#define __NR_vmsplice 309 -#define __NR_move_pages 310 -#define __NR_getcpu 311 -#define __NR_epoll_pwait 312 -#define __NR_utimes 313 -#define __NR_fallocate 314 -#define __NR_utimensat 315 -#define __NR_signalfd 316 -#define __NR_timerfd 317 -#define __NR_eventfd 318 -#define __NR_timerfd_create 319 -#define __NR_timerfd_settime 320 -#define __NR_timerfd_gettime 321 -#define __NR_signalfd4 322 -#define __NR_eventfd2 323 -#define __NR_inotify_init1 324 -#define __NR_pipe2 325 -#define __NR_dup3 326 -#define __NR_epoll_create1 327 -#define __NR_preadv 328 -#define __NR_pwritev 329 -#define __NR_rt_tgsigqueueinfo 330 -#define __NR_perf_event_open 331 -#define __NR_fanotify_init 332 -#define __NR_fanotify_mark 333 -#define __NR_prlimit64 334 -#define __NR_name_to_handle_at 335 -#define __NR_open_by_handle_at 336 -#define __NR_clock_adjtime 337 -#define __NR_syncfs 338 -#define __NR_setns 339 -#define __NR_process_vm_readv 340 -#define __NR_process_vm_writev 341 -#define __NR_s390_runtime_instr 342 -#define __NR_kcmp 343 -#define __NR_finit_module 344 -#define __NR_sched_setattr 345 -#define __NR_sched_getattr 346 -#define __NR_renameat2 347 -#define __NR_seccomp 348 -#define __NR_getrandom 349 -#define __NR_memfd_create 350 -#define __NR_bpf 351 -#define __NR_s390_pci_mmio_write 352 -#define __NR_s390_pci_mmio_read 353 -#define __NR_execveat 354 -#define __NR_userfaultfd 355 -#define __NR_membarrier 356 -#define __NR_recvmmsg 357 -#define __NR_sendmmsg 358 -#define __NR_socket 359 -#define __NR_socketpair 360 -#define __NR_bind 361 -#define __NR_connect 362 -#define __NR_listen 363 -#define __NR_accept4 364 -#define __NR_getsockopt 365 -#define __NR_setsockopt 366 -#define __NR_getsockname 367 -#define __NR_getpeername 368 -#define __NR_sendto 369 -#define __NR_sendmsg 370 -#define __NR_recvfrom 371 -#define __NR_recvmsg 372 -#define __NR_shutdown 373 -#define __NR_mlock2 374 -#define __NR_copy_file_range 375 -#define __NR_preadv2 376 -#define __NR_pwritev2 377 -#define __NR_s390_guarded_storage 378 -#define __NR_statx 379 -#define __NR_s390_sthyi 380 -#define NR_syscalls 381 - -/* - * There are some system calls that are not present on 64 bit, some - * have a different name although they do the same (e.g. __NR_chown32 - * is __NR_chown on 64 bit). - */ -#ifndef __s390x__ - -#define __NR_time 13 -#define __NR_lchown 16 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_getrlimit 76 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_fchown 95 -#define __NR_ioperm 101 -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR__newselect 142 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_chown 182 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_fcntl64 221 -#define __NR_sendfile64 223 -#define __NR_fadvise64_64 264 -#define __NR_fstatat64 293 - -#else - -#define __NR_select 142 -#define __NR_getrlimit 191 /* SuS compliant getrlimit */ -#define __NR_lchown 198 -#define __NR_getuid 199 -#define __NR_getgid 200 -#define __NR_geteuid 201 -#define __NR_getegid 202 -#define __NR_setreuid 203 -#define __NR_setregid 204 -#define __NR_getgroups 205 -#define __NR_setgroups 206 -#define __NR_fchown 207 -#define __NR_setresuid 208 -#define __NR_getresuid 209 -#define __NR_setresgid 210 -#define __NR_getresgid 211 -#define __NR_chown 212 -#define __NR_setuid 213 -#define __NR_setgid 214 -#define __NR_setfsuid 215 -#define __NR_setfsgid 216 -#define __NR_newfstatat 293 - -#endif - -#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 51abdb0a4047..790ec25919a0 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h -arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h arch/alpha/include/uapi/asm/errno.h -- cgit v1.2.3