From e722a295cf493388dae474745d30e91e1a2ec549 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 27 Aug 2020 14:36:27 +0200
Subject: staging: ion: remove from the tree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ION android code has long been marked to be removed, now that we
dma-buf support merged into the real part of the kernel.

It was thought that we could wait to remove the ion kernel at a later
time, but as the out-of-tree Android fork of the ion code has diverged
quite a bit, and any Android device using the ion interface uses that
forked version and not this in-tree version, the in-tree copy of the
code is abandonded and not used by anyone.

Combine this abandoned codebase with the need to make changes to it in
order to keep the kernel building properly, which then causes merge
issues when merging those changes into the out-of-tree Android code, and
you end up with two different groups of people (the in-kernel-tree
developers, and the Android kernel developers) who are both annoyed at
the current situation.  Because of this problem, just drop the in-kernel
copy of the ion code now, as it's not used, and is only causing problems
for everyone involved.

Cc: "Arve Hjønnevåg" <arve@android.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Christian Brauner <christian@brauner.io>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Laura Abbott <laura@labbott.name>
Cc: Martijn Coenen <maco@android.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200827123627.538189-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/Makefile                   |   3 +-
 tools/testing/selftests/android/Makefile           |  39 ----
 tools/testing/selftests/android/config             |   5 -
 tools/testing/selftests/android/ion/.gitignore     |   4 -
 tools/testing/selftests/android/ion/Makefile       |  20 --
 tools/testing/selftests/android/ion/README         | 101 --------
 tools/testing/selftests/android/ion/ion.h          | 134 -----------
 tools/testing/selftests/android/ion/ion_test.sh    |  58 -----
 .../testing/selftests/android/ion/ionapp_export.c  | 127 -----------
 .../testing/selftests/android/ion/ionapp_import.c  |  79 -------
 tools/testing/selftests/android/ion/ionmap_test.c  | 136 -----------
 tools/testing/selftests/android/ion/ionutils.c     | 253 ---------------------
 tools/testing/selftests/android/ion/ionutils.h     |  55 -----
 tools/testing/selftests/android/ion/ipcsocket.c    | 227 ------------------
 tools/testing/selftests/android/ion/ipcsocket.h    |  35 ---
 tools/testing/selftests/android/run.sh             |   3 -
 16 files changed, 1 insertion(+), 1278 deletions(-)
 delete mode 100644 tools/testing/selftests/android/Makefile
 delete mode 100644 tools/testing/selftests/android/config
 delete mode 100644 tools/testing/selftests/android/ion/.gitignore
 delete mode 100644 tools/testing/selftests/android/ion/Makefile
 delete mode 100644 tools/testing/selftests/android/ion/README
 delete mode 100644 tools/testing/selftests/android/ion/ion.h
 delete mode 100755 tools/testing/selftests/android/ion/ion_test.sh
 delete mode 100644 tools/testing/selftests/android/ion/ionapp_export.c
 delete mode 100644 tools/testing/selftests/android/ion/ionapp_import.c
 delete mode 100644 tools/testing/selftests/android/ion/ionmap_test.c
 delete mode 100644 tools/testing/selftests/android/ion/ionutils.c
 delete mode 100644 tools/testing/selftests/android/ion/ionutils.h
 delete mode 100644 tools/testing/selftests/android/ion/ipcsocket.c
 delete mode 100644 tools/testing/selftests/android/ion/ipcsocket.h
 delete mode 100755 tools/testing/selftests/android/run.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..924cbcc22e0d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TARGETS = android
-TARGETS += arm64
+TARGETS = arm64
 TARGETS += bpf
 TARGETS += breakpoints
 TARGETS += capabilities
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
deleted file mode 100644
index 9258306cafe9..000000000000
--- a/tools/testing/selftests/android/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-SUBDIRS := ion
-
-TEST_PROGS := run.sh
-
-.PHONY: all clean
-
-include ../lib.mk
-
-all:
-	@for DIR in $(SUBDIRS); do		\
-		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
-		mkdir $$BUILD_TARGET  -p;	\
-		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
-		TEST=$$DIR"_test.sh"; \
-		if [ -e $$DIR/$$TEST ]; then \
-			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
-		fi \
-	done
-
-override define INSTALL_RULE
-	mkdir -p $(INSTALL_PATH)
-install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)  $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-
-	@for SUBDIR in $(SUBDIRS); do \
-		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
-		mkdir $$BUILD_TARGET  -p;	\
-		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
-	done;
-endef
-
-override define CLEAN
-	@for DIR in $(SUBDIRS); do		\
-		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
-		mkdir $$BUILD_TARGET  -p;	\
-		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-	done
-endef
diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config
deleted file mode 100644
index b4ad748a9dd9..000000000000
--- a/tools/testing/selftests/android/config
+++ /dev/null
@@ -1,5 +0,0 @@
-CONFIG_ANDROID=y
-CONFIG_STAGING=y
-CONFIG_ION=y
-CONFIG_ION_SYSTEM_HEAP=y
-CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
deleted file mode 100644
index 78eae9972bb1..000000000000
--- a/tools/testing/selftests/android/ion/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ionapp_export
-ionapp_import
-ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
deleted file mode 100644
index 42b71f005332..000000000000
--- a/tools/testing/selftests/android/ion/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
-CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
-
-TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
-
-all: $(TEST_GEN_FILES)
-
-$(TEST_GEN_FILES): ipcsocket.c ionutils.c
-
-TEST_PROGS := ion_test.sh
-
-KSFT_KHDR_INSTALL := 1
-top_srcdir = ../../../../..
-include ../../lib.mk
-
-$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c
diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README
deleted file mode 100644
index 21783e9c451e..000000000000
--- a/tools/testing/selftests/android/ion/README
+++ /dev/null
@@ -1,101 +0,0 @@
-ION BUFFER SHARING UTILITY
-==========================
-File: ion_test.sh : Utility to test ION driver buffer sharing mechanism.
-Author: Pintu Kumar <pintu.ping@gmail.com>
-
-Introduction:
--------------
-This is a test utility to verify ION buffer sharing in user space
-between 2 independent processes.
-It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to
-another process to share the same buffer.
-This utility demonstrates how ION buffer sharing can be implemented between
-two user space processes, using various heap types.
-The following heap types are supported by ION driver.
-ION_HEAP_TYPE_SYSTEM (0)
-ION_HEAP_TYPE_SYSTEM_CONTIG (1)
-ION_HEAP_TYPE_CARVEOUT (2)
-ION_HEAP_TYPE_CHUNK (3)
-ION_HEAP_TYPE_DMA (4)
-
-By default only the SYSTEM and SYSTEM_CONTIG heaps are supported.
-Each heap is associated with the respective heap id.
-This utility is designed in the form of client/server program.
-The server part (ionapp_export) is the exporter of the buffer.
-It is responsible for creating an ION client, allocating the buffer based on
-the heap id, writing some data to this buffer and then exporting the FD
-(associated with this buffer) to another process using socket IPC.
-This FD is called as buffer FD (which is different than the ION client FD).
-
-The client part (ionapp_import) is the importer of the buffer.
-It retrives the FD from the socket data and installs into its address space.
-This new FD internally points to the same kernel buffer.
-So first it reads the data that is stored in this buffer and prints it.
-Then it writes the different size of data (it could be different data) to the
-same buffer.
-Finally the buffer FD must be closed by both the exporter and importer.
-Thus the same kernel buffer is shared among two user space processes using
-ION driver and only one time allocation.
-
-Prerequisite:
--------------
-This utility works only if /dev/ion interface is present.
-The following configs needs to be enabled in kernel to include ion driver.
-CONFIG_ANDROID=y
-CONFIG_STAGING=y
-CONFIG_ION=y
-CONFIG_ION_SYSTEM_HEAP=y
-
-This utility requires to be run as root user.
-
-
-Compile and test:
------------------
-This utility is made to be run as part of kselftest framework in kernel.
-To compile and run using kselftest you can simply do the following from the
-kernel top directory.
-linux$ make TARGETS=android kselftest
-Or you can also use:
-linux$ make -C tools/testing/selftests TARGETS=android run_tests
-Using the selftest it can directly execute the ion_test.sh script to test the
-buffer sharing using ion system heap.
-Currently the heap size is hard coded as just 10 bytes inside this script.
-You need to be a root user to run under selftest.
-
-You can also compile and test manually using the following steps:
-ion$ make
-These will generate 2 executable: ionapp_export, ionapp_import
-Now you can run the export and import manually by specifying the heap type
-and the heap size.
-You can also directly execute the shell script to run the test automatically.
-Simply use the following command to run the test.
-ion$ sudo ./ion_test.sh
-
-Test Results:
--------------
-The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14.
-Here is the snapshot of the test result using kselftest.
-
-linux# make TARGETS=android kselftest
-heap_type: 0, heap_size: 10
---------------------------------------
-heap type: 0
-  heap id: 1
-heap name: ion_system_heap
---------------------------------------
-Fill buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-Sharing fd: 6, Client fd: 5
-<ion_close_buffer_fd>: buffer release successfully....
-Received buffer fd: 4
-Read buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0
-0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
-Fill buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-0xfd 0xfd
-<ion_close_buffer_fd>: buffer release successfully....
-ion_test.sh: heap_type: 0 - [PASS]
-
-ion_test.sh: done
diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h
deleted file mode 100644
index 33db23018abf..000000000000
--- a/tools/testing/selftests/android/ion/ion.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * ion.h
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-/* This file is copied from drivers/staging/android/uapi/ion.h
- * This local copy is required for the selftest to pass, when build
- * outside the kernel source tree.
- * Please keep this file in sync with its original file until the
- * ion driver is moved outside the staging tree.
- */
-
-#ifndef _UAPI_LINUX_ION_H
-#define _UAPI_LINUX_ION_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-
-/**
- * enum ion_heap_types - list of all possible types of heaps
- * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
- * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
- * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
- *				 carveout heap, allocations are physically
- *				 contiguous
- * @ION_HEAP_TYPE_DMA:		 memory allocated via DMA API
- * @ION_NUM_HEAPS:		 helper for iterating over heaps, a bit mask
- *				 is used to identify the heaps, so only 32
- *				 total heap types are supported
- */
-enum ion_heap_type {
-	ION_HEAP_TYPE_SYSTEM,
-	ION_HEAP_TYPE_SYSTEM_CONTIG,
-	ION_HEAP_TYPE_CARVEOUT,
-	ION_HEAP_TYPE_CHUNK,
-	ION_HEAP_TYPE_DMA,
-	ION_HEAP_TYPE_CUSTOM, /*
-			       * must be last so device specific heaps always
-			       * are at the end of this enum
-			       */
-};
-
-#define ION_NUM_HEAP_IDS		(sizeof(unsigned int) * 8)
-
-/**
- * allocation flags - the lower 16 bits are used by core ion, the upper 16
- * bits are reserved for use by the heaps themselves.
- */
-
-/*
- * mappings of this buffer should be cached, ion will do cache maintenance
- * when the buffer is mapped for dma
- */
-#define ION_FLAG_CACHED 1
-
-/**
- * DOC: Ion Userspace API
- *
- * create a client by opening /dev/ion
- * most operations handled via following ioctls
- *
- */
-
-/**
- * struct ion_allocation_data - metadata passed from userspace for allocations
- * @len:		size of the allocation
- * @heap_id_mask:	mask of heap ids to allocate from
- * @flags:		flags passed to heap
- * @handle:		pointer that will be populated with a cookie to use to
- *			refer to this allocation
- *
- * Provided by userspace as an argument to the ioctl
- */
-struct ion_allocation_data {
-	__u64 len;
-	__u32 heap_id_mask;
-	__u32 flags;
-	__u32 fd;
-	__u32 unused;
-};
-
-#define MAX_HEAP_NAME			32
-
-/**
- * struct ion_heap_data - data about a heap
- * @name - first 32 characters of the heap name
- * @type - heap type
- * @heap_id - heap id for the heap
- */
-struct ion_heap_data {
-	char name[MAX_HEAP_NAME];
-	__u32 type;
-	__u32 heap_id;
-	__u32 reserved0;
-	__u32 reserved1;
-	__u32 reserved2;
-};
-
-/**
- * struct ion_heap_query - collection of data about all heaps
- * @cnt - total number of heaps to be copied
- * @heaps - buffer to copy heap data
- */
-struct ion_heap_query {
-	__u32 cnt; /* Total number of heaps to be copied */
-	__u32 reserved0; /* align to 64bits */
-	__u64 heaps; /* buffer to be populated */
-	__u32 reserved1;
-	__u32 reserved2;
-};
-
-#define ION_IOC_MAGIC		'I'
-
-/**
- * DOC: ION_IOC_ALLOC - allocate memory
- *
- * Takes an ion_allocation_data struct and returns it with the handle field
- * populated with the opaque handle for the allocation.
- */
-#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
-				      struct ion_allocation_data)
-
-/**
- * DOC: ION_IOC_HEAP_QUERY - information about available heaps
- *
- * Takes an ion_heap_query structure and populates information about
- * available Ion heaps.
- */
-#define ION_IOC_HEAP_QUERY     _IOWR(ION_IOC_MAGIC, 8, \
-					struct ion_heap_query)
-
-#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
deleted file mode 100755
index 69e676cfc94e..000000000000
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-heapsize=4096
-TCID="ion_test.sh"
-errcode=0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-run_test()
-{
-	heaptype=$1
-	./ionapp_export -i $heaptype -s $heapsize &
-	sleep 1
-	./ionapp_import
-	if [ $? -ne 0 ]; then
-		echo "$TCID: heap_type: $heaptype - [FAIL]"
-		errcode=1
-	else
-		echo "$TCID: heap_type: $heaptype - [PASS]"
-	fi
-	sleep 1
-	echo ""
-}
-
-check_root()
-{
-	uid=$(id -u)
-	if [ $uid -ne 0 ]; then
-		echo $TCID: must be run as root >&2
-		exit $ksft_skip
-	fi
-}
-
-check_device()
-{
-	DEVICE=/dev/ion
-	if [ ! -e $DEVICE ]; then
-		echo $TCID: No $DEVICE device found >&2
-		echo $TCID: May be CONFIG_ION is not set >&2
-		exit $ksft_skip
-	fi
-}
-
-main_function()
-{
-	check_device
-	check_root
-
-	# ION_SYSTEM_HEAP TEST
-	run_test 0
-	# ION_SYSTEM_CONTIG_HEAP TEST
-	run_test 1
-}
-
-main_function
-echo "$TCID: done"
-exit $errcode
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
deleted file mode 100644
index 063b7830d1bd..000000000000
--- a/tools/testing/selftests/android/ion/ionapp_export.c
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ionapp_export.c
- *
- * It is a user space utility to create and export android
- * ion memory buffer fd to another process using unix domain socket as IPC.
- * This acts like a server for ionapp_import(client).
- * So, this server has to be started first before the client.
- *
- * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/time.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-void print_usage(int argc, char *argv[])
-{
-	printf("Usage: %s [-h <help>] [-i <heap id>] [-s <size in bytes>]\n",
-		argv[0]);
-}
-
-int main(int argc, char *argv[])
-{
-	int opt, ret, status, heapid;
-	int sockfd, client_fd, shared_fd;
-	unsigned char *map_buf;
-	unsigned long map_len, heap_type, heap_size, flags;
-	struct ion_buffer_info info;
-	struct socket_info skinfo;
-
-	if (argc < 2) {
-		print_usage(argc, argv);
-		return -1;
-	}
-
-	heap_size = 0;
-	flags = 0;
-	heap_type = ION_HEAP_TYPE_SYSTEM;
-
-	while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
-		switch (opt) {
-		case 'h':
-			print_usage(argc, argv);
-			exit(0);
-			break;
-		case 'i':
-			heapid = atoi(optarg);
-			switch (heapid) {
-			case 0:
-				heap_type = ION_HEAP_TYPE_SYSTEM;
-				break;
-			case 1:
-				heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG;
-				break;
-			default:
-				printf("ERROR: heap type not supported\n");
-				exit(1);
-			}
-			break;
-		case 's':
-			heap_size = atoi(optarg);
-			break;
-		default:
-			print_usage(argc, argv);
-			exit(1);
-			break;
-		}
-	}
-
-	if (heap_size <= 0) {
-		printf("heap_size cannot be 0\n");
-		print_usage(argc, argv);
-		exit(1);
-	}
-
-	printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size);
-	info.heap_type = heap_type;
-	info.heap_size = heap_size;
-	info.flag_type = flags;
-
-	/* This is server: open the socket connection first */
-	/* Here; 1 indicates server or exporter */
-	status = opensocket(&sockfd, SOCKET_NAME, 1);
-	if (status < 0) {
-		fprintf(stderr, "<%s>: Failed opensocket.\n", __func__);
-		goto err_socket;
-	}
-	skinfo.sockfd = sockfd;
-
-	ret = ion_export_buffer_fd(&info);
-	if (ret < 0) {
-		fprintf(stderr, "FAILED: ion_get_buffer_fd\n");
-		goto err_export;
-	}
-	client_fd = info.ionfd;
-	shared_fd = info.buffd;
-	map_buf = info.buffer;
-	map_len = info.buflen;
-	write_buffer(map_buf, map_len);
-
-	/* share ion buf fd with other user process */
-	printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd);
-	skinfo.datafd = shared_fd;
-	skinfo.buflen = map_len;
-
-	ret = socket_send_fd(&skinfo);
-	if (ret < 0) {
-		fprintf(stderr, "FAILED: socket_send_fd\n");
-		goto err_send;
-	}
-
-err_send:
-err_export:
-	ion_close_buffer_fd(&info);
-
-err_socket:
-	closesocket(sockfd, SOCKET_NAME);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c
deleted file mode 100644
index 54b580cb04f6..000000000000
--- a/tools/testing/selftests/android/ion/ionapp_import.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ionapp_import.c
- *
- * It is a user space utility to receive android ion memory buffer fd
- * over unix domain socket IPC that can be exported by ionapp_export.
- * This acts like a client for ionapp_export.
- *
- * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-int main(void)
-{
-	int ret, status;
-	int sockfd, shared_fd;
-	unsigned char *map_buf;
-	unsigned long map_len;
-	struct ion_buffer_info info;
-	struct socket_info skinfo;
-
-	/* This is the client part. Here 0 means client or importer */
-	status = opensocket(&sockfd, SOCKET_NAME, 0);
-	if (status < 0) {
-		fprintf(stderr, "No exporter exists...\n");
-		ret = status;
-		goto err_socket;
-	}
-
-	skinfo.sockfd = sockfd;
-
-	ret = socket_receive_fd(&skinfo);
-	if (ret < 0) {
-		fprintf(stderr, "Failed: socket_receive_fd\n");
-		goto err_recv;
-	}
-
-	shared_fd = skinfo.datafd;
-	printf("Received buffer fd: %d\n", shared_fd);
-	if (shared_fd <= 0) {
-		fprintf(stderr, "ERROR: improper buf fd\n");
-		ret = -1;
-		goto err_fd;
-	}
-
-	memset(&info, 0, sizeof(info));
-	info.buffd = shared_fd;
-	info.buflen = ION_BUFFER_LEN;
-
-	ret = ion_import_buffer_fd(&info);
-	if (ret < 0) {
-		fprintf(stderr, "Failed: ion_use_buffer_fd\n");
-		goto err_import;
-	}
-
-	map_buf = info.buffer;
-	map_len = info.buflen;
-	read_buffer(map_buf, map_len);
-
-	/* Write probably new data to the same buffer again */
-	map_len = ION_BUFFER_LEN;
-	write_buffer(map_buf, map_len);
-
-err_import:
-	ion_close_buffer_fd(&info);
-err_fd:
-err_recv:
-err_socket:
-	closesocket(sockfd, SOCKET_NAME);
-
-	return ret;
-}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
deleted file mode 100644
index dab36b06b37d..000000000000
--- a/tools/testing/selftests/android/ion/ionmap_test.c
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <linux/dma-buf.h>
-
-#include <drm/drm.h>
-
-#include "ion.h"
-#include "ionutils.h"
-
-int check_vgem(int fd)
-{
-	drm_version_t version = { 0 };
-	char name[5];
-	int ret;
-
-	version.name_len = 4;
-	version.name = name;
-
-	ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
-	if (ret)
-		return 1;
-
-	return strcmp(name, "vgem");
-}
-
-int open_vgem(void)
-{
-	int i, fd;
-	const char *drmstr = "/dev/dri/card";
-
-	fd = -1;
-	for (i = 0; i < 16; i++) {
-		char name[80];
-
-		sprintf(name, "%s%u", drmstr, i);
-
-		fd = open(name, O_RDWR);
-		if (fd < 0)
-			continue;
-
-		if (check_vgem(fd)) {
-			close(fd);
-			continue;
-		} else {
-			break;
-		}
-
-	}
-	return fd;
-}
-
-int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
-{
-	struct drm_prime_handle import_handle = { 0 };
-	int ret;
-
-	import_handle.fd = dma_buf_fd;
-	import_handle.flags = 0;
-	import_handle.handle = 0;
-
-	ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
-	if (ret == 0)
-		*handle = import_handle.handle;
-	return ret;
-}
-
-void close_handle(int vgem_fd, uint32_t handle)
-{
-	struct drm_gem_close close = { 0 };
-
-	close.handle = handle;
-	ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
-}
-
-int main()
-{
-	int ret, vgem_fd;
-	struct ion_buffer_info info;
-	uint32_t handle = 0;
-	struct dma_buf_sync sync = { 0 };
-
-	info.heap_type = ION_HEAP_TYPE_SYSTEM;
-	info.heap_size = 4096;
-	info.flag_type = ION_FLAG_CACHED;
-
-	ret = ion_export_buffer_fd(&info);
-	if (ret < 0) {
-		printf("ion buffer alloc failed\n");
-		return -1;
-	}
-
-	vgem_fd = open_vgem();
-	if (vgem_fd < 0) {
-		ret = vgem_fd;
-		printf("Failed to open vgem\n");
-		goto out_ion;
-	}
-
-	ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
-
-	if (ret < 0) {
-		printf("Failed to import buffer\n");
-		goto out_vgem;
-	}
-
-	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
-	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
-	if (ret)
-		printf("sync start failed %d\n", errno);
-
-	memset(info.buffer, 0xff, 4096);
-
-	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
-	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
-	if (ret)
-		printf("sync end failed %d\n", errno);
-
-	close_handle(vgem_fd, handle);
-	ret = 0;
-
-out_vgem:
-	close(vgem_fd);
-out_ion:
-	ion_close_buffer_fd(&info);
-	printf("done.\n");
-	return ret;
-}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
deleted file mode 100644
index 7d1d37c4ef6a..000000000000
--- a/tools/testing/selftests/android/ion/ionutils.c
+++ /dev/null
@@ -1,253 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-//#include <stdint.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-void write_buffer(void *buffer, unsigned long len)
-{
-	int i;
-	unsigned char *ptr = (unsigned char *)buffer;
-
-	if (!ptr) {
-		fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
-		return;
-	}
-
-	printf("Fill buffer content:\n");
-	memset(ptr, 0xfd, len);
-	for (i = 0; i < len; i++)
-		printf("0x%x ", ptr[i]);
-	printf("\n");
-}
-
-void read_buffer(void *buffer, unsigned long len)
-{
-	int i;
-	unsigned char *ptr = (unsigned char *)buffer;
-
-	if (!ptr) {
-		fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
-		return;
-	}
-
-	printf("Read buffer content:\n");
-	for (i = 0; i < len; i++)
-		printf("0x%x ", ptr[i]);
-	printf("\n");
-}
-
-int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
-{
-	int i, ret, ionfd, buffer_fd;
-	unsigned int heap_id;
-	unsigned long maplen;
-	unsigned char *map_buffer;
-	struct ion_allocation_data alloc_data;
-	struct ion_heap_query query;
-	struct ion_heap_data heap_data[MAX_HEAP_COUNT];
-
-	if (!ion_info) {
-		fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
-		return -1;
-	}
-
-	/* Create an ION client */
-	ionfd = open(ION_DEVICE, O_RDWR);
-	if (ionfd < 0) {
-		fprintf(stderr, "<%s>: Failed to open ion client: %s\n",
-			__func__, strerror(errno));
-		return -1;
-	}
-
-	memset(&query, 0, sizeof(query));
-	query.cnt = MAX_HEAP_COUNT;
-	query.heaps = (unsigned long int)&heap_data[0];
-	/* Query ION heap_id_mask from ION heap */
-	ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n",
-			__func__, strerror(errno));
-		goto err_query;
-	}
-
-	heap_id = MAX_HEAP_COUNT + 1;
-	for (i = 0; i < query.cnt; i++) {
-		if (heap_data[i].type == ion_info->heap_type) {
-			heap_id = heap_data[i].heap_id;
-			break;
-		}
-	}
-
-	if (heap_id > MAX_HEAP_COUNT) {
-		fprintf(stderr, "<%s>: ERROR: heap type does not exists\n",
-			__func__);
-		goto err_heap;
-	}
-
-	alloc_data.len = ion_info->heap_size;
-	alloc_data.heap_id_mask = 1 << heap_id;
-	alloc_data.flags = ion_info->flag_type;
-
-	/* Allocate memory for this ION client as per heap_type */
-	ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n",
-			__func__, strerror(errno));
-		goto err_alloc;
-	}
-
-	/* This will return a valid buffer fd */
-	buffer_fd = alloc_data.fd;
-	maplen = alloc_data.len;
-
-	if (buffer_fd < 0 || maplen <= 0) {
-		fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
-			__func__, buffer_fd, maplen);
-		goto err_fd_data;
-	}
-
-	/* Create memory mapped buffer for the buffer fd */
-	map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE,
-			MAP_SHARED, buffer_fd, 0);
-	if (map_buffer == MAP_FAILED) {
-		fprintf(stderr, "<%s>: Failed: mmap: %s\n",
-			__func__, strerror(errno));
-		goto err_mmap;
-	}
-
-	ion_info->ionfd = ionfd;
-	ion_info->buffd = buffer_fd;
-	ion_info->buffer = map_buffer;
-	ion_info->buflen = maplen;
-
-	return 0;
-
-	munmap(map_buffer, maplen);
-
-err_fd_data:
-err_mmap:
-	/* in case of error: close the buffer fd */
-	if (buffer_fd)
-		close(buffer_fd);
-
-err_query:
-err_heap:
-err_alloc:
-	/* In case of error: close the ion client fd */
-	if (ionfd)
-		close(ionfd);
-
-	return -1;
-}
-
-int ion_import_buffer_fd(struct ion_buffer_info *ion_info)
-{
-	int buffd;
-	unsigned char *map_buf;
-	unsigned long map_len;
-
-	if (!ion_info) {
-		fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
-		return -1;
-	}
-
-	map_len = ion_info->buflen;
-	buffd = ion_info->buffd;
-
-	if (buffd < 0 || map_len <= 0) {
-		fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
-			__func__, buffd, map_len);
-		goto err_buffd;
-	}
-
-	map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE,
-			MAP_SHARED, buffd, 0);
-	if (map_buf == MAP_FAILED) {
-		printf("<%s>: Failed - mmap: %s\n",
-			__func__, strerror(errno));
-		goto err_mmap;
-	}
-
-	ion_info->buffer = map_buf;
-	ion_info->buflen = map_len;
-
-	return 0;
-
-err_mmap:
-	if (buffd)
-		close(buffd);
-
-err_buffd:
-	return -1;
-}
-
-void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
-{
-	if (ion_info) {
-		/* unmap the buffer properly in the end */
-		munmap(ion_info->buffer, ion_info->buflen);
-		/* close the buffer fd */
-		if (ion_info->buffd > 0)
-			close(ion_info->buffd);
-		/* Finally, close the client fd */
-		if (ion_info->ionfd > 0)
-			close(ion_info->ionfd);
-	}
-}
-
-int socket_send_fd(struct socket_info *info)
-{
-	int status;
-	int fd, sockfd;
-	struct socketdata skdata;
-
-	if (!info) {
-		fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
-		return -1;
-	}
-
-	sockfd = info->sockfd;
-	fd = info->datafd;
-	memset(&skdata, 0, sizeof(skdata));
-	skdata.data = fd;
-	skdata.len = sizeof(skdata.data);
-	status = sendtosocket(sockfd, &skdata);
-	if (status < 0) {
-		fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__);
-		return -1;
-	}
-
-	return 0;
-}
-
-int socket_receive_fd(struct socket_info *info)
-{
-	int status;
-	int fd, sockfd;
-	struct socketdata skdata;
-
-	if (!info) {
-		fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
-		return -1;
-	}
-
-	sockfd = info->sockfd;
-	memset(&skdata, 0, sizeof(skdata));
-	status = receivefromsocket(sockfd, &skdata);
-	if (status < 0) {
-		fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__);
-		return -1;
-	}
-
-	fd = (int)skdata.data;
-	info->datafd = fd;
-
-	return status;
-}
diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h
deleted file mode 100644
index 9941eb858576..000000000000
--- a/tools/testing/selftests/android/ion/ionutils.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef __ION_UTILS_H
-#define __ION_UTILS_H
-
-#include "ion.h"
-
-#define SOCKET_NAME "ion_socket"
-#define ION_DEVICE "/dev/ion"
-
-#define ION_BUFFER_LEN	4096
-#define MAX_HEAP_COUNT	ION_HEAP_TYPE_CUSTOM
-
-struct socket_info {
-	int sockfd;
-	int datafd;
-	unsigned long buflen;
-};
-
-struct ion_buffer_info {
-	int ionfd;
-	int buffd;
-	unsigned int heap_type;
-	unsigned int flag_type;
-	unsigned long heap_size;
-	unsigned long buflen;
-	unsigned char *buffer;
-};
-
-
-/* This is used to fill the data into the mapped buffer */
-void write_buffer(void *buffer, unsigned long len);
-
-/* This is used to read the data from the exported buffer */
-void read_buffer(void *buffer, unsigned long len);
-
-/* This is used to create an ION buffer FD for the kernel buffer
- * So you can export this same buffer to others in the form of FD
- */
-int ion_export_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to import or map an exported FD.
- * So we point to same buffer without making a copy. Hence zero-copy.
- */
-int ion_import_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to close all references for the ION client */
-void ion_close_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to send FD to another process using socket IPC */
-int socket_send_fd(struct socket_info *skinfo);
-
-/* This is used to receive FD from another process using socket IPC */
-int socket_receive_fd(struct socket_info *skinfo);
-
-
-#endif
diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c
deleted file mode 100644
index 7dc521002095..000000000000
--- a/tools/testing/selftests/android/ion/ipcsocket.c
+++ /dev/null
@@ -1,227 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/un.h>
-#include <errno.h>
-
-#include "ipcsocket.h"
-
-
-int opensocket(int *sockfd, const char *name, int connecttype)
-{
-	int ret, temp = 1;
-
-	if (!name || strlen(name) > MAX_SOCK_NAME_LEN) {
-		fprintf(stderr, "<%s>: Invalid socket name.\n", __func__);
-		return -1;
-	}
-
-	ret = socket(PF_LOCAL, SOCK_STREAM, 0);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed socket: <%s>\n",
-			__func__, strerror(errno));
-		return ret;
-	}
-
-	*sockfd = ret;
-	if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR,
-		(char *)&temp, sizeof(int)) < 0) {
-		fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n",
-		__func__, strerror(errno));
-		goto err;
-	}
-
-	sprintf(sock_name, "/tmp/%s", name);
-
-	if (connecttype == 1) {
-		/* This is for Server connection */
-		struct sockaddr_un skaddr;
-		int clientfd;
-		socklen_t sklen;
-
-		unlink(sock_name);
-		memset(&skaddr, 0, sizeof(skaddr));
-		skaddr.sun_family = AF_LOCAL;
-		strcpy(skaddr.sun_path, sock_name);
-
-		ret = bind(*sockfd, (struct sockaddr *)&skaddr,
-			SUN_LEN(&skaddr));
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed bind: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-
-		ret = listen(*sockfd, 5);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed listen: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-
-		memset(&skaddr, 0, sizeof(skaddr));
-		sklen = sizeof(skaddr);
-
-		ret = accept(*sockfd, (struct sockaddr *)&skaddr,
-			(socklen_t *)&sklen);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed accept: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-
-		clientfd = ret;
-		*sockfd = clientfd;
-	} else {
-		/* This is for client connection */
-		struct sockaddr_un skaddr;
-
-		memset(&skaddr, 0, sizeof(skaddr));
-		skaddr.sun_family = AF_LOCAL;
-		strcpy(skaddr.sun_path, sock_name);
-
-		ret = connect(*sockfd, (struct sockaddr *)&skaddr,
-			SUN_LEN(&skaddr));
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed connect: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	if (*sockfd)
-		close(*sockfd);
-
-	return ret;
-}
-
-int sendtosocket(int sockfd, struct socketdata *skdata)
-{
-	int ret, buffd;
-	unsigned int len;
-	char cmsg_b[CMSG_SPACE(sizeof(int))];
-	struct cmsghdr *cmsg;
-	struct msghdr msgh;
-	struct iovec iov;
-	struct timeval timeout;
-	fd_set selFDs;
-
-	if (!skdata) {
-		fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
-		return -1;
-	}
-
-	FD_ZERO(&selFDs);
-	FD_SET(0, &selFDs);
-	FD_SET(sockfd, &selFDs);
-	timeout.tv_sec = 20;
-	timeout.tv_usec = 0;
-
-	ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed select: <%s>\n",
-		__func__, strerror(errno));
-		return -1;
-	}
-
-	if (FD_ISSET(sockfd, &selFDs)) {
-		buffd = skdata->data;
-		len = skdata->len;
-		memset(&msgh, 0, sizeof(msgh));
-		msgh.msg_control = &cmsg_b;
-		msgh.msg_controllen = CMSG_LEN(len);
-		iov.iov_base = "OK";
-		iov.iov_len = 2;
-		msgh.msg_iov = &iov;
-		msgh.msg_iovlen = 1;
-		cmsg = CMSG_FIRSTHDR(&msgh);
-		cmsg->cmsg_level = SOL_SOCKET;
-		cmsg->cmsg_type = SCM_RIGHTS;
-		cmsg->cmsg_len = CMSG_LEN(len);
-		memcpy(CMSG_DATA(cmsg), &buffd, len);
-
-		ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n",
-			__func__, strerror(errno));
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-int receivefromsocket(int sockfd, struct socketdata *skdata)
-{
-	int ret, buffd;
-	unsigned int len = 0;
-	char cmsg_b[CMSG_SPACE(sizeof(int))];
-	struct cmsghdr *cmsg;
-	struct msghdr msgh;
-	struct iovec iov;
-	fd_set recvFDs;
-	char data[32];
-
-	if (!skdata) {
-		fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
-		return -1;
-	}
-
-	FD_ZERO(&recvFDs);
-	FD_SET(0, &recvFDs);
-	FD_SET(sockfd, &recvFDs);
-
-	ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed select: <%s>\n",
-		__func__, strerror(errno));
-		return -1;
-	}
-
-	if (FD_ISSET(sockfd, &recvFDs)) {
-		len = sizeof(buffd);
-		memset(&msgh, 0, sizeof(msgh));
-		msgh.msg_control = &cmsg_b;
-		msgh.msg_controllen = CMSG_LEN(len);
-		iov.iov_base = data;
-		iov.iov_len = sizeof(data)-1;
-		msgh.msg_iov = &iov;
-		msgh.msg_iovlen = 1;
-		cmsg = CMSG_FIRSTHDR(&msgh);
-		cmsg->cmsg_level = SOL_SOCKET;
-		cmsg->cmsg_type = SCM_RIGHTS;
-		cmsg->cmsg_len = CMSG_LEN(len);
-
-		ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n",
-			__func__, strerror(errno));
-			return -1;
-		}
-
-		memcpy(&buffd, CMSG_DATA(cmsg), len);
-		skdata->data = buffd;
-		skdata->len = len;
-	}
-	return 0;
-}
-
-int closesocket(int sockfd, char *name)
-{
-	char sockname[MAX_SOCK_NAME_LEN];
-
-	if (sockfd)
-		close(sockfd);
-	sprintf(sockname, "/tmp/%s", name);
-	unlink(sockname);
-	shutdown(sockfd, 2);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h
deleted file mode 100644
index b3e84498a8a1..000000000000
--- a/tools/testing/selftests/android/ion/ipcsocket.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#ifndef _IPCSOCKET_H
-#define _IPCSOCKET_H
-
-
-#define MAX_SOCK_NAME_LEN	64
-
-char sock_name[MAX_SOCK_NAME_LEN];
-
-/* This structure is responsible for holding the IPC data
- * data: hold the buffer fd
- * len: just the length of 32-bit integer fd
- */
-struct socketdata {
-	int data;
-	unsigned int len;
-};
-
-/* This API is used to open the IPC socket connection
- * name: implies a unique socket name in the system
- * connecttype: implies server(0) or client(1)
- */
-int opensocket(int *sockfd, const char *name, int connecttype);
-
-/* This is the API to send socket data over IPC socket */
-int sendtosocket(int sockfd, struct socketdata *data);
-
-/* This is the API to receive socket data over IPC socket */
-int receivefromsocket(int sockfd, struct socketdata *data);
-
-/* This is the API to close the socket connection */
-int closesocket(int sockfd, char *name);
-
-
-#endif
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
deleted file mode 100755
index dd8edf291454..000000000000
--- a/tools/testing/selftests/android/run.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-(cd ion; ./ion_test.sh)
-- 
cgit v1.2.3


From 3adb776384f2042ef6bda876e91a7a7ac2872c5e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 19 Oct 2020 21:08:03 -0700
Subject: x86, libnvdimm/test: Remove COPY_MC_TEST

The COPY_MC_TEST facility has served its purpose for validating the
early termination conditions of the copy_mc_fragile() implementation.
Remove it and the EXPORT_SYMBOL_GPL of copy_mc_fragile().

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/160316688322.3374697.8648308115165836243.stgit@dwillia2-desk3.amr.corp.intel.com
---
 tools/testing/nvdimm/test/nfit.c | 103 ---------------------------------------
 1 file changed, 103 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 2ac0fff6dad8..9b185bf82da8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,6 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/copy_mc_test.h>
 #include <asm/mce.h>
 
 /*
@@ -3284,107 +3283,6 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
-static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-
-enum INJECT {
-	INJECT_NONE,
-	INJECT_SRC,
-	INJECT_DST,
-};
-
-static void copy_mc_test_init(char *dst, char *src, size_t size)
-{
-	size_t i;
-
-	memset(dst, 0xff, size);
-	for (i = 0; i < size; i++)
-		src[i] = (char) i;
-}
-
-static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
-		size_t size, unsigned long rem)
-{
-	size_t i;
-
-	for (i = 0; i < size - rem; i++)
-		if (dst[i] != (unsigned char) i) {
-			pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
-					__func__, __LINE__, i, dst[i],
-					(unsigned char) i);
-			return false;
-		}
-	for (i = size - rem; i < size; i++)
-		if (dst[i] != 0xffU) {
-			pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
-					__func__, __LINE__, i, dst[i]);
-			return false;
-		}
-	return true;
-}
-
-void copy_mc_test(void)
-{
-	char *inject_desc[] = { "none", "source", "destination" };
-	enum INJECT inj;
-
-	if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
-		pr_info("%s: run...\n", __func__);
-	} else {
-		pr_info("%s: disabled, skip.\n", __func__);
-		return;
-	}
-
-	for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
-		int i;
-
-		pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
-		for (i = 0; i < 512; i++) {
-			unsigned long expect, rem;
-			void *src, *dst;
-			bool valid;
-
-			switch (inj) {
-			case INJECT_NONE:
-				copy_mc_inject_src(NULL);
-				copy_mc_inject_dst(NULL);
-				dst = &copy_mc_buf[2048];
-				src = &copy_mc_buf[1024 - i];
-				expect = 0;
-				break;
-			case INJECT_SRC:
-				copy_mc_inject_src(&copy_mc_buf[1024]);
-				copy_mc_inject_dst(NULL);
-				dst = &copy_mc_buf[2048];
-				src = &copy_mc_buf[1024 - i];
-				expect = 512 - i;
-				break;
-			case INJECT_DST:
-				copy_mc_inject_src(NULL);
-				copy_mc_inject_dst(&copy_mc_buf[2048]);
-				dst = &copy_mc_buf[2048 - i];
-				src = &copy_mc_buf[1024];
-				expect = 512 - i;
-				break;
-			}
-
-			copy_mc_test_init(dst, src, 512);
-			rem = copy_mc_fragile(dst, src, 512);
-			valid = copy_mc_test_validate(dst, src, 512, expect);
-			if (rem == expect && valid)
-				continue;
-			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
-					__func__,
-					((unsigned long) dst) & ~PAGE_MASK,
-					((unsigned long ) src) & ~PAGE_MASK,
-					512, i, rem, valid ? "valid" : "bad",
-					expect);
-		}
-	}
-
-	copy_mc_inject_src(NULL);
-	copy_mc_inject_dst(NULL);
-}
-
 static __init int nfit_test_init(void)
 {
 	int rc, i;
@@ -3393,7 +3291,6 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
-	copy_mc_test();
 	dax_pmem_test();
 	dax_pmem_core_test();
 #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
-- 
cgit v1.2.3


From 748f0d70087c56226bf1df1f91a00b7ab4c8f883 Mon Sep 17 00:00:00 2001
From: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
Date: Fri, 23 Oct 2020 20:55:27 +0530
Subject: cpupower: Provide online and offline CPU information

When a user tries to modify cpuidle or cpufreq properties on offline
CPUs, the tool returns success (exit status 0) but also does not provide
any warning message regarding offline cpus that may have been specified
but left unchanged. In case of all or a few CPUs being offline, it can be
difficult to keep track of which CPUs didn't get the new frequency or idle
state set. Silent failures are difficult to keep track of when there are a
huge number of CPUs on which the action is performed.

This patch adds helper functions to find both online and offline CPUs and
print them out accordingly.

We use these helper functions in cpuidle-set and cpufreq-set to print an
additional message if the user attempts to modify offline cpus.

Reported-by: Pavithra R. Prakash <pavrampu@in.ibm.com>
Signed-off-by: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpufreq-set.c     |  3 ++
 tools/power/cpupower/utils/cpuidle-set.c     |  4 ++
 tools/power/cpupower/utils/cpupower.c        |  8 ++++
 tools/power/cpupower/utils/helpers/helpers.h | 12 +++++
 tools/power/cpupower/utils/helpers/misc.c    | 66 +++++++++++++++++++++++++++-
 5 files changed, 92 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 7b2164e07057..c5e60a39cfa6 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -315,6 +315,7 @@ int cmd_freq_set(int argc, char **argv)
 		}
 	}
 
+	get_cpustate();
 
 	/* loop over CPUs */
 	for (cpu = bitmask_first(cpus_chosen);
@@ -332,5 +333,7 @@ int cmd_freq_set(int argc, char **argv)
 		}
 	}
 
+	print_offline_cpus();
+
 	return 0;
 }
diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index 569f268f4c7f..46158928f9ad 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -95,6 +95,8 @@ int cmd_idle_set(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	get_cpustate();
+
 	/* Default is: set all CPUs */
 	if (bitmask_isallclear(cpus_chosen))
 		bitmask_setall(cpus_chosen);
@@ -181,5 +183,7 @@ int cmd_idle_set(int argc, char **argv)
 			break;
 		}
 	}
+
+	print_offline_cpus();
 	return EXIT_SUCCESS;
 }
diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c
index 8e3d08042825..8ac3304a9957 100644
--- a/tools/power/cpupower/utils/cpupower.c
+++ b/tools/power/cpupower/utils/cpupower.c
@@ -34,6 +34,8 @@ int run_as_root;
 int base_cpu;
 /* Affected cpus chosen by -c/--cpu param */
 struct bitmask *cpus_chosen;
+struct bitmask *online_cpus;
+struct bitmask *offline_cpus;
 
 #ifdef DEBUG
 int be_verbose;
@@ -178,6 +180,8 @@ int main(int argc, const char *argv[])
 	char pathname[32];
 
 	cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
+	online_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
+	offline_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
 
 	argc--;
 	argv += 1;
@@ -230,6 +234,10 @@ int main(int argc, const char *argv[])
 		ret = p->main(argc, argv);
 		if (cpus_chosen)
 			bitmask_free(cpus_chosen);
+		if (online_cpus)
+			bitmask_free(online_cpus);
+		if (offline_cpus)
+			bitmask_free(offline_cpus);
 		return ret;
 	}
 	print_help();
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index c258eeccd05f..d5799aa71e1f 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -94,6 +94,8 @@ struct cpupower_cpu_info {
  */
 extern int get_cpu_info(struct cpupower_cpu_info *cpu_info);
 extern struct cpupower_cpu_info cpupower_cpu_info;
+
+
 /* cpuid and cpuinfo helpers  **************************/
 
 /* X86 ONLY ****************************************/
@@ -171,4 +173,14 @@ static inline unsigned int cpuid_ecx(unsigned int op) { return 0; };
 static inline unsigned int cpuid_edx(unsigned int op) { return 0; };
 #endif /* defined(__i386__) || defined(__x86_64__) */
 
+/*
+ * CPU State related functions
+ */
+extern struct bitmask *online_cpus;
+extern struct bitmask *offline_cpus;
+
+void get_cpustate(void);
+void print_online_cpus(void);
+void print_offline_cpus(void);
+
 #endif /* __CPUPOWERUTILS_HELPERS__ */
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index f406adc40bad..2ead98169cf5 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -1,8 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdlib.h>
 
 #include "helpers/helpers.h"
 
+#if defined(__i386__) || defined(__x86_64__)
+
 #define MSR_AMD_HWCR	0xc0010015
 
 int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
@@ -41,3 +45,63 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 	return 0;
 }
 #endif /* #if defined(__i386__) || defined(__x86_64__) */
+
+/* get_cpustate
+ *
+ * Gather the information of all online CPUs into bitmask struct
+ */
+void get_cpustate(void)
+{
+	unsigned int cpu = 0;
+
+	bitmask_clearall(online_cpus);
+	bitmask_clearall(offline_cpus);
+
+	for (cpu = bitmask_first(cpus_chosen);
+		cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+		if (cpupower_is_cpu_online(cpu) == 1)
+			bitmask_setbit(online_cpus, cpu);
+		else
+			bitmask_setbit(offline_cpus, cpu);
+
+		continue;
+	}
+}
+
+/* print_online_cpus
+ *
+ * Print the CPU numbers of all CPUs that are online currently
+ */
+void print_online_cpus(void)
+{
+	int str_len = 0;
+	char *online_cpus_str = NULL;
+
+	str_len = online_cpus->size * 5;
+	online_cpus_str = (void *)malloc(sizeof(char) * str_len);
+
+	if (!bitmask_isallclear(online_cpus)) {
+		bitmask_displaylist(online_cpus_str, str_len, online_cpus);
+		printf(_("Following CPUs are online:\n%s\n"), online_cpus_str);
+	}
+}
+
+/* print_offline_cpus
+ *
+ * Print the CPU numbers of all CPUs that are offline currently
+ */
+void print_offline_cpus(void)
+{
+	int str_len = 0;
+	char *offline_cpus_str = NULL;
+
+	str_len = offline_cpus->size * 5;
+	offline_cpus_str = (void *)malloc(sizeof(char) * str_len);
+
+	if (!bitmask_isallclear(offline_cpus)) {
+		bitmask_displaylist(offline_cpus_str, str_len, offline_cpus);
+		printf(_("Following CPUs are offline:\n%s\n"), offline_cpus_str);
+		printf(_("cpupower set operation was not performed on them\n"));
+	}
+}
-- 
cgit v1.2.3


From 9270e1a744f8ed953009b0e94b26ed0912d9ec1c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 3 Oct 2020 21:40:22 -0400
Subject: tools: memory-model: Document that the LKMM can easily miss control
 dependencies

Add a small section to the litmus-tests.txt documentation file for
the Linux Kernel Memory Model explaining that the memory model often
fails to recognize certain control dependencies.

Suggested-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/litmus-tests.txt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
index 2f840dcd15cf..8a9d5d2787f9 100644
--- a/tools/memory-model/Documentation/litmus-tests.txt
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -946,6 +946,23 @@ Limitations of the Linux-kernel memory model (LKMM) include:
 	carrying a dependency, then the compiler can break that dependency
 	by substituting a constant of that value.
 
+	Conversely, LKMM sometimes doesn't recognize that a particular
+	optimization is not allowed, and as a result, thinks that a
+	dependency is not present (because the optimization would break it).
+	The memory model misses some pretty obvious control dependencies
+	because of this limitation.  A simple example is:
+
+		r1 = READ_ONCE(x);
+		if (r1 == 0)
+			smp_mb();
+		WRITE_ONCE(y, 1);
+
+	There is a control dependency from the READ_ONCE to the WRITE_ONCE,
+	even when r1 is nonzero, but LKMM doesn't realize this and thinks
+	that the write may execute before the read if r1 != 0.  (Yes, that
+	doesn't make sense if you think about it, but the memory model's
+	intelligence is limited.)
+
 2.	Multiple access sizes for a single variable are not supported,
 	and neither are misaligned or partially overlapping accesses.
 
-- 
cgit v1.2.3


From ab8bcad67bee82e4be290b32f0faaf582d7c3edc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 4 Aug 2020 14:00:17 -0700
Subject: tools/memory-model: Move Documentation description to
 Documentation/README

This commit moves the descriptions of the files residing in
tools/memory-model/Documentation to a README file in that directory,
leaving behind the description of tools/memory-model/Documentation/README
itself.  After this change, tools/memory-model/Documentation/README
provides a guide to the files in the tools/memory-model/Documentation
directory, guiding people with different skills and needs to the most
appropriate starting point.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/README | 59 +++++++++++++++++++++++++++++++++
 tools/memory-model/README               | 22 ++----------
 2 files changed, 61 insertions(+), 20 deletions(-)
 create mode 100644 tools/memory-model/Documentation/README

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
new file mode 100644
index 000000000000..2d9539f19912
--- /dev/null
+++ b/tools/memory-model/Documentation/README
@@ -0,0 +1,59 @@
+It has been said that successful communication requires first identifying
+what your audience knows and then building a bridge from their current
+knowledge to what they need to know.  Unfortunately, the expected
+Linux-kernel memory model (LKMM) audience might be anywhere from novice
+to expert both in kernel hacking and in understanding LKMM.
+
+This document therefore points out a number of places to start reading,
+depending on what you know and what you would like to learn.  Please note
+that the documents later in this list assume that the reader understands
+the material provided by documents earlier in this list.
+
+o	You are new to Linux-kernel concurrency: simple.txt
+
+o	You are familiar with the Linux-kernel concurrency primitives
+	that you need, and just want to get started with LKMM litmus
+	tests:  litmus-tests.txt
+
+o	You are familiar with Linux-kernel concurrency, and would
+	like a detailed intuitive understanding of LKMM, including
+	situations involving more than two threads:  recipes.txt
+
+o	You are familiar with Linux-kernel concurrency and the use of
+	LKMM, and would like a quick reference:  cheatsheet.txt
+
+o	You are familiar with Linux-kernel concurrency and the use
+	of LKMM, and would like to learn about LKMM's requirements,
+	rationale, and implementation:	explanation.txt
+
+o	You are interested in the publications related to LKMM, including
+	hardware manuals, academic literature, standards-committee
+	working papers, and LWN articles:  references.txt
+
+
+====================
+DESCRIPTION OF FILES
+====================
+
+README
+	This file.
+
+cheatsheet.txt
+	Quick-reference guide to the Linux-kernel memory model.
+
+explanation.txt
+	Detailed description of the memory model.
+
+litmus-tests.txt
+	The format, features, capabilities, and limitations of the litmus
+	tests that LKMM can evaluate.
+
+recipes.txt
+	Common memory-ordering patterns.
+
+references.txt
+	Background information.
+
+simple.txt
+	Starting point for someone new to Linux-kernel concurrency.
+	And also a reminder of the simpler approaches to concurrency!
diff --git a/tools/memory-model/README b/tools/memory-model/README
index c8144d4aafa0..39d08d1f0443 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -161,26 +161,8 @@ running LKMM litmus tests.
 DESCRIPTION OF FILES
 ====================
 
-Documentation/cheatsheet.txt
-	Quick-reference guide to the Linux-kernel memory model.
-
-Documentation/explanation.txt
-	Describes the memory model in detail.
-
-Documentation/litmus-tests.txt
-	Describes the format, features, capabilities, and limitations
-	of the litmus tests that LKMM can evaluate.
-
-Documentation/recipes.txt
-	Lists common memory-ordering patterns.
-
-Documentation/references.txt
-	Provides background reading.
-
-Documentation/simple.txt
-	Starting point for someone new to Linux-kernel concurrency.
-	And also for those needing a reminder of the simpler approaches
-	to concurrency!
+Documentation/README
+	Guide to the other documents in the Documentation/ directory.
 
 linux-kernel.bell
 	Categorizes the relevant instructions, including memory
-- 
cgit v1.2.3


From 40723419f4079d0c7de98d0f3149db915557b55a Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:41 +0000
Subject: kselftest: Enable vDSO test on non x86 platforms

Currently the vDSO tests are built only on x86 platforms and cannot be
cross compiled.

Enable vDSO TARGET for all the platforms.

Future patches will extend the tests.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile      | 1 +
 tools/testing/selftests/vDSO/Makefile | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..2dd60070d775 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -65,6 +65,7 @@ endif
 TARGETS += tmpfs
 TARGETS += tpm2
 TARGETS += user
+TARGETS += vDSO
 TARGETS += vm
 TARGETS += x86
 TARGETS += zram
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 0069f2f83f86..47031f3c5f3a 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -9,7 +9,6 @@ ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
 
-ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
 ifeq ($(CONFIG_X86_32),y)
@@ -24,4 +23,3 @@ $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 		vdso_standalone_test_x86.c parse_vdso.c \
 		-o $@
 
-endif
-- 
cgit v1.2.3


From 693f5ca08ca0767b407b7ca634dbf1b783676ec3 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:42 +0000
Subject: kselftest: Extend vDSO selftest

The current version of the multiarch vDSO selftest verifies only
gettimeofday.

Extend the vDSO selftest to the other library functions:
 - time
 - clock_getres
 - clock_gettime

The extension has been used to verify the unified vdso library on the
supported architectures.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/Makefile        |   2 +
 tools/testing/selftests/vDSO/vdso_config.h   |  90 ++++++++++
 tools/testing/selftests/vDSO/vdso_test_abi.c | 244 +++++++++++++++++++++++++++
 3 files changed, 336 insertions(+)
 create mode 100644 tools/testing/selftests/vDSO/vdso_config.h
 create mode 100644 tools/testing/selftests/vDSO/vdso_test_abi.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 47031f3c5f3a..2ac7448bd414 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -5,6 +5,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
 ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
@@ -18,6 +19,7 @@ endif
 all: $(TEST_GEN_PROGS)
 $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
 $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
+$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
new file mode 100644
index 000000000000..eeb725df6045
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * vdso_config.h: Configuration options for vDSO tests.
+ * Copyright (c) 2019 Arm Ltd.
+ */
+#ifndef __VDSO_CONFIG_H__
+#define __VDSO_CONFIG_H__
+
+/*
+ * Each architecture exports its vDSO implementation with different names
+ * and a different version from the others, so we need to handle it as a
+ * special case.
+ */
+#if defined(__arm__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__aarch64__)
+#define VDSO_VERSION		3
+#define VDSO_NAMES		0
+#elif defined(__powerpc__)
+#define VDSO_VERSION		1
+#define VDSO_NAMES		0
+#define VDSO_32BIT		1
+#elif defined(__powerpc64__)
+#define VDSO_VERSION		1
+#define VDSO_NAMES		0
+#elif defined (__s390__)
+#define VDSO_VERSION		2
+#define VDSO_NAMES		0
+#define VDSO_32BIT		1
+#elif defined (__s390X__)
+#define VDSO_VERSION		2
+#define VDSO_NAMES		0
+#elif defined(__mips__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__sparc__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__i386__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__x86_64__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#elif defined(__riscv__)
+#define VDSO_VERSION		5
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#else /* nds32 */
+#define VDSO_VERSION		4
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#endif
+
+static const char *versions[6] = {
+	"LINUX_2.6",
+	"LINUX_2.6.15",
+	"LINUX_2.6.29",
+	"LINUX_2.6.39",
+	"LINUX_4",
+	"LINUX_4.15",
+};
+
+static const char *names[2][5] = {
+	{
+		"__kernel_gettimeofday",
+		"__kernel_clock_gettime",
+		"__kernel_time",
+		"__kernel_clock_getres",
+#if defined(VDSO_32BIT)
+		"__kernel_clock_gettime64",
+#endif
+	},
+	{
+		"__vdso_gettimeofday",
+		"__vdso_clock_gettime",
+		"__vdso_time",
+		"__vdso_clock_getres",
+#if defined(VDSO_32BIT)
+		"__vdso_clock_gettime64",
+#endif
+	},
+};
+
+#endif /* __VDSO_CONFIG_H__ */
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
new file mode 100644
index 000000000000..3d603f1394af
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vdso_full_test.c: Sample code to test all the timers.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_full_test.c parse_vdso.c
+ *
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <time.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest.h"
+#include "vdso_config.h"
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+static const char *version;
+static const char **name;
+
+typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
+typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
+typedef time_t (*vdso_time_t)(time_t *t);
+
+static int vdso_test_gettimeofday(void)
+{
+	/* Find gettimeofday. */
+	vdso_gettimeofday_t vdso_gettimeofday =
+		(vdso_gettimeofday_t)vdso_sym(version, name[0]);
+
+	if (!vdso_gettimeofday) {
+		printf("Could not find %s\n", name[0]);
+		return KSFT_SKIP;
+	}
+
+	struct timeval tv;
+	long ret = vdso_gettimeofday(&tv, 0);
+
+	if (ret == 0) {
+		printf("The time is %lld.%06lld\n",
+		       (long long)tv.tv_sec, (long long)tv.tv_usec);
+	} else {
+		printf("%s failed\n", name[0]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static int vdso_test_clock_gettime(clockid_t clk_id)
+{
+	/* Find clock_gettime. */
+	vdso_clock_gettime_t vdso_clock_gettime =
+		(vdso_clock_gettime_t)vdso_sym(version, name[1]);
+
+	if (!vdso_clock_gettime) {
+		printf("Could not find %s\n", name[1]);
+		return KSFT_SKIP;
+	}
+
+	struct timespec ts;
+	long ret = vdso_clock_gettime(clk_id, &ts);
+
+	if (ret == 0) {
+		printf("The time is %lld.%06lld\n",
+		       (long long)ts.tv_sec, (long long)ts.tv_nsec);
+	} else {
+		printf("%s failed\n", name[1]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static int vdso_test_time(void)
+{
+	/* Find time. */
+	vdso_time_t vdso_time =
+		(vdso_time_t)vdso_sym(version, name[2]);
+
+	if (!vdso_time) {
+		printf("Could not find %s\n", name[2]);
+		return KSFT_SKIP;
+	}
+
+	long ret = vdso_time(NULL);
+
+	if (ret > 0) {
+		printf("The time in hours since January 1, 1970 is %lld\n",
+				(long long)(ret / 3600));
+	} else {
+		printf("%s failed\n", name[2]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static int vdso_test_clock_getres(clockid_t clk_id)
+{
+	/* Find clock_getres. */
+	vdso_clock_getres_t vdso_clock_getres =
+		(vdso_clock_getres_t)vdso_sym(version, name[3]);
+
+	if (!vdso_clock_getres) {
+		printf("Could not find %s\n", name[3]);
+		return KSFT_SKIP;
+	}
+
+	struct timespec ts, sys_ts;
+	long ret = vdso_clock_getres(clk_id, &ts);
+
+	if (ret == 0) {
+		printf("The resolution is %lld %lld\n",
+		       (long long)ts.tv_sec, (long long)ts.tv_nsec);
+	} else {
+		printf("%s failed\n", name[3]);
+		return KSFT_FAIL;
+	}
+
+	ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
+
+	if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) {
+		printf("%s failed\n", name[3]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+const char *vdso_clock_name[12] = {
+	"CLOCK_REALTIME",
+	"CLOCK_MONOTONIC",
+	"CLOCK_PROCESS_CPUTIME_ID",
+	"CLOCK_THREAD_CPUTIME_ID",
+	"CLOCK_MONOTONIC_RAW",
+	"CLOCK_REALTIME_COARSE",
+	"CLOCK_MONOTONIC_COARSE",
+	"CLOCK_BOOTTIME",
+	"CLOCK_REALTIME_ALARM",
+	"CLOCK_BOOTTIME_ALARM",
+	"CLOCK_SGI_CYCLE",
+	"CLOCK_TAI",
+};
+
+/*
+ * This function calls vdso_test_clock_gettime and vdso_test_clock_getres
+ * with different values for clock_id.
+ */
+static inline int vdso_test_clock(clockid_t clock_id)
+{
+	int ret0, ret1;
+
+	ret0 = vdso_test_clock_gettime(clock_id);
+	/* A skipped test is considered passed */
+	if (ret0 == KSFT_SKIP)
+		ret0 = KSFT_PASS;
+
+	ret1 = vdso_test_clock_getres(clock_id);
+	/* A skipped test is considered passed */
+	if (ret1 == KSFT_SKIP)
+		ret1 = KSFT_PASS;
+
+	ret0 += ret1;
+
+	printf("clock_id: %s", vdso_clock_name[clock_id]);
+
+	if (ret0 > 0)
+		printf(" [FAIL]\n");
+	else
+		printf(" [PASS]\n");
+
+	return ret0;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	int ret;
+
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return KSFT_SKIP;
+	}
+
+	version = versions[VDSO_VERSION];
+	name = (const char **)&names[VDSO_NAMES];
+
+	printf("[vDSO kselftest] VDSO_VERSION: %s\n", version);
+
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+	ret = vdso_test_gettimeofday();
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+	ret += vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+	ret += vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+	ret += vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+	ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+	ret += vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+	ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+	ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+
+	ret += vdso_test_time();
+
+	if (ret > 0)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
-- 
cgit v1.2.3


From 03f55c7952c92d8577d6e9bc695f3fd20032cfd9 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:43 +0000
Subject: kselftest: Extend vDSO selftest to clock_getres

The current version of the multiarch vDSO selftest verifies only
gettimeofday.

Extend the vDSO selftest to clock_getres, to verify that the
syscall and the vDSO library function return the same information.

The extension has been used to verify the hrtimer_resoltion fix.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/Makefile              |   2 +
 .../selftests/vDSO/vdso_test_clock_getres.c        | 124 +++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 tools/testing/selftests/vDSO/vdso_test_clock_getres.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 2ac7448bd414..375d80c2bff5 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -6,6 +6,7 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
 TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
 ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
@@ -20,6 +21,7 @@ all: $(TEST_GEN_PROGS)
 $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
 $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
 $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
+$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
new file mode 100644
index 000000000000..15dcee16ff72
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ * vdso_clock_getres.c: Sample code to test clock_getres.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_clock_getres.c
+ *
+ * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit),
+ * Power (32-bit and 64-bit), S390x (32-bit and 64-bit).
+ * Might work on other architectures.
+ */
+
+#define _GNU_SOURCE
+#include <elf.h>
+#include <err.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest.h"
+
+static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts)
+{
+	long ret;
+
+	ret = syscall(SYS_clock_getres, _clkid, _ts);
+
+	return ret;
+}
+
+const char *vdso_clock_name[12] = {
+	"CLOCK_REALTIME",
+	"CLOCK_MONOTONIC",
+	"CLOCK_PROCESS_CPUTIME_ID",
+	"CLOCK_THREAD_CPUTIME_ID",
+	"CLOCK_MONOTONIC_RAW",
+	"CLOCK_REALTIME_COARSE",
+	"CLOCK_MONOTONIC_COARSE",
+	"CLOCK_BOOTTIME",
+	"CLOCK_REALTIME_ALARM",
+	"CLOCK_BOOTTIME_ALARM",
+	"CLOCK_SGI_CYCLE",
+	"CLOCK_TAI",
+};
+
+/*
+ * This function calls clock_getres in vdso and by system call
+ * with different values for clock_id.
+ *
+ * Example of output:
+ *
+ * clock_id: CLOCK_REALTIME [PASS]
+ * clock_id: CLOCK_BOOTTIME [PASS]
+ * clock_id: CLOCK_TAI [PASS]
+ * clock_id: CLOCK_REALTIME_COARSE [PASS]
+ * clock_id: CLOCK_MONOTONIC [PASS]
+ * clock_id: CLOCK_MONOTONIC_RAW [PASS]
+ * clock_id: CLOCK_MONOTONIC_COARSE [PASS]
+ */
+static inline int vdso_test_clock(unsigned int clock_id)
+{
+	struct timespec x, y;
+
+	printf("clock_id: %s", vdso_clock_name[clock_id]);
+	clock_getres(clock_id, &x);
+	syscall_clock_getres(clock_id, &y);
+
+	if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) {
+		printf(" [FAIL]\n");
+		return KSFT_FAIL;
+	}
+
+	printf(" [PASS]\n");
+	return KSFT_PASS;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+	ret = vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+	ret += vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+	ret += vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+	ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+	ret += vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+	ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+	ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+	if (ret > 0)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
-- 
cgit v1.2.3


From c7e5789b24d36dd5dddd36ea2b99280a606cac42 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:44 +0000
Subject: kselftest: Move test_vdso to the vDSO test suite

Move test_vdso from x86 to the vDSO test suite.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/Makefile              |  10 +-
 .../testing/selftests/vDSO/vdso_test_correctness.c | 342 +++++++++++++++++++++
 tools/testing/selftests/x86/Makefile               |   2 +-
 tools/testing/selftests/x86/test_vdso.c            | 342 ---------------------
 4 files changed, 351 insertions(+), 345 deletions(-)
 create mode 100644 tools/testing/selftests/vDSO/vdso_test_correctness.c
 delete mode 100644 tools/testing/selftests/x86/test_vdso.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 375d80c2bff5..d53a4d8008f9 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -7,12 +7,14 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
 TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
 TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
-ifeq ($(ARCH),x86)
+ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
 
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+LDFLAGS_vdso_test_correctness := -ldl
 ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
@@ -26,4 +28,8 @@ $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
 		-o $@
-
+$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
+	$(CC) $(CFLAGS) \
+		vdso_test_correctness.c \
+		-o $@ \
+		$(LDFLAGS_vdso_test_correctness)
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
new file mode 100644
index 000000000000..42052db0f870
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+int nerrs = 0;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
+#ifdef __x86_64__
+	FILE *maps;
+	char line[MAPS_LINE_LEN];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+		return NULL;
+
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
+		char r, x;
+		void *start, *end;
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		/* assume entries are OK, as we test vDSO here not vsyscall */
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("Warning: failed to find vsyscall getcpu\n");
+		return NULL;
+	}
+	return (void *) (0xffffffffff600800);
+#else
+	return NULL;
+#endif
+}
+
+
+static void fill_function_pointers()
+{
+	void *vdso = dlopen("linux-vdso.so.1",
+			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[WARN]\tfailed to find vDSO\n");
+		return;
+	}
+
+	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	if (!vdso_getcpu)
+		printf("Warning: failed to find getcpu in vDSO\n");
+
+	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+		       void* cache)
+{
+	return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
+static void test_getcpu(void)
+{
+	printf("[RUN]\tTesting getcpu...\n");
+
+	for (int cpu = 0; ; cpu++) {
+		cpu_set_t cpuset;
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu, &cpuset);
+		if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+			return;
+
+		unsigned cpu_sys, cpu_vdso, cpu_vsys,
+			node_sys, node_vdso, node_vsys;
+		long ret_sys, ret_vdso = 1, ret_vsys = 1;
+		unsigned node;
+
+		ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+		if (vdso_getcpu)
+			ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+		if (vgetcpu)
+			ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+		if (!ret_sys)
+			node = node_sys;
+		else if (!ret_vdso)
+			node = node_vdso;
+		else if (!ret_vsys)
+			node = node_vsys;
+
+		bool ok = true;
+		if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+			ok = false;
+		if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+			ok = false;
+		if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+			ok = false;
+
+		printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+		if (!ret_sys)
+			printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+		if (!ret_vdso)
+			printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+		if (!ret_vsys)
+			printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+			       node_vsys);
+		printf("\n");
+
+		if (!ok)
+			nerrs++;
+	}
+}
+
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	if (!vdso_clock_gettime) {
+		printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
+		return;
+	}
+
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
+int main(int argc, char **argv)
+{
+	fill_function_pointers();
+
+	test_clock_gettime();
+	test_gettimeofday();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
+	test_getcpu();
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 6703c7906b71..333980375bc7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl ioperm \
-			test_vdso test_vsyscall mov_ss_trap \
+			test_vsyscall mov_ss_trap \
 			syscall_arg_fault fsgsbase_restore
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
deleted file mode 100644
index 42052db0f870..000000000000
--- a/tools/testing/selftests/x86/test_vdso.c
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ldt_gdt.c - Test cases for LDT and GDT access
- * Copyright (c) 2011-2015 Andrew Lutomirski
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <sys/time.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <dlfcn.h>
-#include <string.h>
-#include <errno.h>
-#include <sched.h>
-#include <stdbool.h>
-#include <limits.h>
-
-#ifndef SYS_getcpu
-# ifdef __x86_64__
-#  define SYS_getcpu 309
-# else
-#  define SYS_getcpu 318
-# endif
-#endif
-
-/* max length of lines in /proc/self/maps - anything longer is skipped here */
-#define MAPS_LINE_LEN 128
-
-int nerrs = 0;
-
-typedef int (*vgettime_t)(clockid_t, struct timespec *);
-
-vgettime_t vdso_clock_gettime;
-
-typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
-
-vgtod_t vdso_gettimeofday;
-
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-getcpu_t vgetcpu;
-getcpu_t vdso_getcpu;
-
-static void *vsyscall_getcpu(void)
-{
-#ifdef __x86_64__
-	FILE *maps;
-	char line[MAPS_LINE_LEN];
-	bool found = false;
-
-	maps = fopen("/proc/self/maps", "r");
-	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
-		return NULL;
-
-	while (fgets(line, MAPS_LINE_LEN, maps)) {
-		char r, x;
-		void *start, *end;
-		char name[MAPS_LINE_LEN];
-
-		/* sscanf() is safe here as strlen(name) >= strlen(line) */
-		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
-			   &start, &end, &r, &x, name) != 5)
-			continue;
-
-		if (strcmp(name, "[vsyscall]"))
-			continue;
-
-		/* assume entries are OK, as we test vDSO here not vsyscall */
-		found = true;
-		break;
-	}
-
-	fclose(maps);
-
-	if (!found) {
-		printf("Warning: failed to find vsyscall getcpu\n");
-		return NULL;
-	}
-	return (void *) (0xffffffffff600800);
-#else
-	return NULL;
-#endif
-}
-
-
-static void fill_function_pointers()
-{
-	void *vdso = dlopen("linux-vdso.so.1",
-			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
-	if (!vdso)
-		vdso = dlopen("linux-gate.so.1",
-			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
-	if (!vdso) {
-		printf("[WARN]\tfailed to find vDSO\n");
-		return;
-	}
-
-	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
-	if (!vdso_getcpu)
-		printf("Warning: failed to find getcpu in vDSO\n");
-
-	vgetcpu = (getcpu_t) vsyscall_getcpu();
-
-	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
-	if (!vdso_clock_gettime)
-		printf("Warning: failed to find clock_gettime in vDSO\n");
-
-	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
-	if (!vdso_gettimeofday)
-		printf("Warning: failed to find gettimeofday in vDSO\n");
-
-}
-
-static long sys_getcpu(unsigned * cpu, unsigned * node,
-		       void* cache)
-{
-	return syscall(__NR_getcpu, cpu, node, cache);
-}
-
-static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
-{
-	return syscall(__NR_clock_gettime, id, ts);
-}
-
-static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-	return syscall(__NR_gettimeofday, tv, tz);
-}
-
-static void test_getcpu(void)
-{
-	printf("[RUN]\tTesting getcpu...\n");
-
-	for (int cpu = 0; ; cpu++) {
-		cpu_set_t cpuset;
-		CPU_ZERO(&cpuset);
-		CPU_SET(cpu, &cpuset);
-		if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
-			return;
-
-		unsigned cpu_sys, cpu_vdso, cpu_vsys,
-			node_sys, node_vdso, node_vsys;
-		long ret_sys, ret_vdso = 1, ret_vsys = 1;
-		unsigned node;
-
-		ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
-		if (vdso_getcpu)
-			ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
-		if (vgetcpu)
-			ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
-
-		if (!ret_sys)
-			node = node_sys;
-		else if (!ret_vdso)
-			node = node_vdso;
-		else if (!ret_vsys)
-			node = node_vsys;
-
-		bool ok = true;
-		if (!ret_sys && (cpu_sys != cpu || node_sys != node))
-			ok = false;
-		if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
-			ok = false;
-		if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
-			ok = false;
-
-		printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
-		if (!ret_sys)
-			printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
-		if (!ret_vdso)
-			printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
-		if (!ret_vsys)
-			printf(" vsyscall: cpu %u, node %u", cpu_vsys,
-			       node_vsys);
-		printf("\n");
-
-		if (!ok)
-			nerrs++;
-	}
-}
-
-static bool ts_leq(const struct timespec *a, const struct timespec *b)
-{
-	if (a->tv_sec != b->tv_sec)
-		return a->tv_sec < b->tv_sec;
-	else
-		return a->tv_nsec <= b->tv_nsec;
-}
-
-static bool tv_leq(const struct timeval *a, const struct timeval *b)
-{
-	if (a->tv_sec != b->tv_sec)
-		return a->tv_sec < b->tv_sec;
-	else
-		return a->tv_usec <= b->tv_usec;
-}
-
-static char const * const clocknames[] = {
-	[0] = "CLOCK_REALTIME",
-	[1] = "CLOCK_MONOTONIC",
-	[2] = "CLOCK_PROCESS_CPUTIME_ID",
-	[3] = "CLOCK_THREAD_CPUTIME_ID",
-	[4] = "CLOCK_MONOTONIC_RAW",
-	[5] = "CLOCK_REALTIME_COARSE",
-	[6] = "CLOCK_MONOTONIC_COARSE",
-	[7] = "CLOCK_BOOTTIME",
-	[8] = "CLOCK_REALTIME_ALARM",
-	[9] = "CLOCK_BOOTTIME_ALARM",
-	[10] = "CLOCK_SGI_CYCLE",
-	[11] = "CLOCK_TAI",
-};
-
-static void test_one_clock_gettime(int clock, const char *name)
-{
-	struct timespec start, vdso, end;
-	int vdso_ret, end_ret;
-
-	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
-
-	if (sys_clock_gettime(clock, &start) < 0) {
-		if (errno == EINVAL) {
-			vdso_ret = vdso_clock_gettime(clock, &vdso);
-			if (vdso_ret == -EINVAL) {
-				printf("[OK]\tNo such clock.\n");
-			} else {
-				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
-				nerrs++;
-			}
-		} else {
-			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
-		}
-		return;
-	}
-
-	vdso_ret = vdso_clock_gettime(clock, &vdso);
-	end_ret = sys_clock_gettime(clock, &end);
-
-	if (vdso_ret != 0 || end_ret != 0) {
-		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
-		       vdso_ret, errno);
-		nerrs++;
-		return;
-	}
-
-	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
-	       (unsigned long long)start.tv_sec, start.tv_nsec,
-	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
-	       (unsigned long long)end.tv_sec, end.tv_nsec);
-
-	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
-		printf("[FAIL]\tTimes are out of sequence\n");
-		nerrs++;
-	}
-}
-
-static void test_clock_gettime(void)
-{
-	if (!vdso_clock_gettime) {
-		printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
-		return;
-	}
-
-	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
-	     clock++) {
-		test_one_clock_gettime(clock, clocknames[clock]);
-	}
-
-	/* Also test some invalid clock ids */
-	test_one_clock_gettime(-1, "invalid");
-	test_one_clock_gettime(INT_MIN, "invalid");
-	test_one_clock_gettime(INT_MAX, "invalid");
-}
-
-static void test_gettimeofday(void)
-{
-	struct timeval start, vdso, end;
-	struct timezone sys_tz, vdso_tz;
-	int vdso_ret, end_ret;
-
-	if (!vdso_gettimeofday)
-		return;
-
-	printf("[RUN]\tTesting gettimeofday...\n");
-
-	if (sys_gettimeofday(&start, &sys_tz) < 0) {
-		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
-		nerrs++;
-		return;
-	}
-
-	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
-	end_ret = sys_gettimeofday(&end, NULL);
-
-	if (vdso_ret != 0 || end_ret != 0) {
-		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
-		       vdso_ret, errno);
-		nerrs++;
-		return;
-	}
-
-	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
-	       (unsigned long long)start.tv_sec, start.tv_usec,
-	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
-	       (unsigned long long)end.tv_sec, end.tv_usec);
-
-	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
-		printf("[FAIL]\tTimes are out of sequence\n");
-		nerrs++;
-	}
-
-	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
-	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
-		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
-		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
-	} else {
-		printf("[FAIL]\ttimezones do not match\n");
-		nerrs++;
-	}
-
-	/* And make sure that passing NULL for tz doesn't crash. */
-	vdso_gettimeofday(&vdso, NULL);
-}
-
-int main(int argc, char **argv)
-{
-	fill_function_pointers();
-
-	test_clock_gettime();
-	test_gettimeofday();
-
-	/*
-	 * Test getcpu() last so that, if something goes wrong setting affinity,
-	 * we still run the other tests.
-	 */
-	test_getcpu();
-
-	return nerrs ? 1 : 0;
-}
-- 
cgit v1.2.3


From b2f1c3db28870d88d1a19aa86a8374e7725d62c5 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:45 +0000
Subject: kselftest: Extend vdso correctness test to clock_gettime64

With the release of Linux 5.1 has been added a new syscall,
clock_gettime64, that provided a 64 bit time value for a specified
clock_ID to make the kernel Y2038 safe on 32 bit architectures.

Extend the vdso correctness test to cover the newly exposed vdso
function.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/vdso_config.h         |   4 +-
 .../testing/selftests/vDSO/vdso_test_correctness.c | 115 ++++++++++++++++++++-
 2 files changed, 115 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index eeb725df6045..6a6fe8d4ff55 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -66,12 +66,13 @@ static const char *versions[6] = {
 	"LINUX_4.15",
 };
 
-static const char *names[2][5] = {
+static const char *names[2][6] = {
 	{
 		"__kernel_gettimeofday",
 		"__kernel_clock_gettime",
 		"__kernel_time",
 		"__kernel_clock_getres",
+		"__kernel_getcpu",
 #if defined(VDSO_32BIT)
 		"__kernel_clock_gettime64",
 #endif
@@ -81,6 +82,7 @@ static const char *names[2][5] = {
 		"__vdso_clock_gettime",
 		"__vdso_time",
 		"__vdso_clock_getres",
+		"__vdso_getcpu",
 #if defined(VDSO_32BIT)
 		"__vdso_clock_gettime64",
 #endif
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index 42052db0f870..5029ef9b228c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -19,6 +19,10 @@
 #include <stdbool.h>
 #include <limits.h>
 
+#include "vdso_config.h"
+
+static const char **name;
+
 #ifndef SYS_getcpu
 # ifdef __x86_64__
 #  define SYS_getcpu 309
@@ -27,6 +31,17 @@
 # endif
 #endif
 
+#ifndef __NR_clock_gettime64
+#define __NR_clock_gettime64	403
+#endif
+
+#ifndef __kernel_timespec
+struct __kernel_timespec {
+	long long	tv_sec;
+	long long	tv_nsec;
+};
+#endif
+
 /* max length of lines in /proc/self/maps - anything longer is skipped here */
 #define MAPS_LINE_LEN 128
 
@@ -36,6 +51,10 @@ typedef int (*vgettime_t)(clockid_t, struct timespec *);
 
 vgettime_t vdso_clock_gettime;
 
+typedef int (*vgettime64_t)(clockid_t, struct __kernel_timespec *);
+
+vgettime64_t vdso_clock_gettime64;
+
 typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
 
 vgtod_t vdso_gettimeofday;
@@ -99,17 +118,23 @@ static void fill_function_pointers()
 		return;
 	}
 
-	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]);
 	if (!vdso_getcpu)
 		printf("Warning: failed to find getcpu in vDSO\n");
 
 	vgetcpu = (getcpu_t) vsyscall_getcpu();
 
-	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]);
 	if (!vdso_clock_gettime)
 		printf("Warning: failed to find clock_gettime in vDSO\n");
 
-	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+#if defined(VDSO_32BIT)
+	vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]);
+	if (!vdso_clock_gettime64)
+		printf("Warning: failed to find clock_gettime64 in vDSO\n");
+#endif
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]);
 	if (!vdso_gettimeofday)
 		printf("Warning: failed to find gettimeofday in vDSO\n");
 
@@ -126,6 +151,11 @@ static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
 	return syscall(__NR_clock_gettime, id, ts);
 }
 
+static inline int sys_clock_gettime64(clockid_t id, struct __kernel_timespec *ts)
+{
+	return syscall(__NR_clock_gettime64, id, ts);
+}
+
 static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	return syscall(__NR_gettimeofday, tv, tz);
@@ -191,6 +221,15 @@ static bool ts_leq(const struct timespec *a, const struct timespec *b)
 		return a->tv_nsec <= b->tv_nsec;
 }
 
+static bool ts64_leq(const struct __kernel_timespec *a,
+		     const struct __kernel_timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
 static bool tv_leq(const struct timeval *a, const struct timeval *b)
 {
 	if (a->tv_sec != b->tv_sec)
@@ -254,7 +293,10 @@ static void test_one_clock_gettime(int clock, const char *name)
 	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
 		printf("[FAIL]\tTimes are out of sequence\n");
 		nerrs++;
+		return;
 	}
+
+	printf("[OK]\tTest Passed.\n");
 }
 
 static void test_clock_gettime(void)
@@ -275,6 +317,70 @@ static void test_clock_gettime(void)
 	test_one_clock_gettime(INT_MAX, "invalid");
 }
 
+static void test_one_clock_gettime64(int clock, const char *name)
+{
+	struct __kernel_timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime64 for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime64(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime64(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime64 returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime64(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime64(clock, &vdso);
+	end_ret = sys_clock_gettime64(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts64_leq(&start, &vdso) || !ts64_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+		return;
+	}
+
+	printf("[OK]\tTest Passed.\n");
+}
+
+static void test_clock_gettime64(void)
+{
+	if (!vdso_clock_gettime64) {
+		printf("[SKIP]\tNo vDSO, so skipping clock_gettime64() tests\n");
+		return;
+	}
+
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime64(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime64(-1, "invalid");
+	test_one_clock_gettime64(INT_MIN, "invalid");
+	test_one_clock_gettime64(INT_MAX, "invalid");
+}
+
 static void test_gettimeofday(void)
 {
 	struct timeval start, vdso, end;
@@ -327,9 +433,12 @@ static void test_gettimeofday(void)
 
 int main(int argc, char **argv)
 {
+	name = (const char **)&names[VDSO_NAMES];
+
 	fill_function_pointers();
 
 	test_clock_gettime();
+	test_clock_gettime64();
 	test_gettimeofday();
 
 	/*
-- 
cgit v1.2.3


From 7afc9d8f82906754e16fce560fb4e9733bb9b75e Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:19 +0200
Subject: selftests: net: bridge: rename current igmp tests to igmpv2

To prepare the bridge_igmp.sh for IGMPv3 we need to rename the
current test to IGMPv2.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 88d2472ba151..481198300b72 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="reportleave_test"
+ALL_TESTS="v2reportleave_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -110,7 +110,7 @@ mcast_packet_test()
 	return $seen
 }
 
-reportleave_test()
+v2reportleave_test()
 {
 	RET=0
 	ip address add dev $h2 $TEST_GROUP/32 autojoin
@@ -118,12 +118,12 @@ reportleave_test()
 
 	sleep 5
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
-	check_err $? "Report didn't create mdb entry for $TEST_GROUP"
+	check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
 
 	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
 	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
 
-	log_test "IGMP report $TEST_GROUP"
+	log_test "IGMPv2 report $TEST_GROUP"
 
 	RET=0
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
@@ -139,7 +139,7 @@ reportleave_test()
 	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
 	check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
 
-	log_test "IGMP leave $TEST_GROUP"
+	log_test "IGMPv2 leave $TEST_GROUP"
 }
 
 trap cleanup EXIT
-- 
cgit v1.2.3


From 79ae3e256aa1cfaa801e23a13b7f9e1a49cacb20 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:20 +0200
Subject: selftests: net: bridge: igmp: add support for packet source address

Add support for one more argument which specifies the source address to
use. It will be later used for IGMPv3 S,G entry testing.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 481198300b72..1c19459dbc58 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -83,9 +83,10 @@ cleanup()
 mcast_packet_test()
 {
 	local mac=$1
-	local ip=$2
-	local host1_if=$3
-	local host2_if=$4
+	local src_ip=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
 	local seen=0
 
 	# Add an ACL on `host2_if` which will tell us whether the packet
@@ -94,7 +95,7 @@ mcast_packet_test()
 	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
 		flower dst_mac $mac action drop
 
-	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q
+	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
 	sleep 1
 
 	tc -j -s filter show dev $host2_if ingress \
@@ -120,7 +121,7 @@ v2reportleave_test()
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
 	check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
 
-	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
 	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
 
 	log_test "IGMPv2 report $TEST_GROUP"
@@ -136,7 +137,7 @@ v2reportleave_test()
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
 	check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP"
 
-	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
 	check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
 
 	log_test "IGMPv2 leave $TEST_GROUP"
-- 
cgit v1.2.3


From f0e260db4c9e0576b2092f30bddd6816f9d37383 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:21 +0200
Subject: selftests: net: bridge: igmp: check for specific udp ip protocol

We have to specifically check for udp protocol in addition to the mac
address because in IGMPv3 tests group-specific queries will use the same
mac address.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 1c19459dbc58..5562aef14c0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -93,7 +93,7 @@ mcast_packet_test()
 	# was received by it or not.
 	tc qdisc add dev $host2_if ingress
 	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
-		flower dst_mac $mac action drop
+		flower ip_proto udp dst_mac $mac action drop
 
 	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
 	sleep 1
-- 
cgit v1.2.3


From 68d3163a4b7e50bc8e9bbf689d55174fdcd44fa5 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:22 +0200
Subject: selftests: net: bridge: igmp: add IGMPv3 entries' state helpers

Add helpers which will be used in subsequent tests, they are:
 - check_sg_entries: check for proper source list and S,G entry
   existence
 - check_sg_fwding: check for proper traffic forwarding/blocking
 - check_sg_state: check for proper blocked/forwarding entry state

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 67 ++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 5562aef14c0a..19c1f46d1151 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -143,6 +143,73 @@ v2reportleave_test()
 	log_test "IGMPv2 leave $TEST_GROUP"
 }
 
+check_sg_entries()
+{
+	local report=$1; shift
+	local slist=("$@")
+	local sarg=""
+
+	for src in "${slist[@]}"; do
+		sarg="${sarg} and .source_list[].address == \"$src\""
+	done
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+	check_err $? "Wrong *,G entry source list after $report report"
+
+	for sgent in "${slist[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+	done
+}
+
+check_sg_fwding()
+{
+	local should_fwd=$1; shift
+	local sources=("$@")
+
+	for src in "${sources[@]}"; do
+		local retval=0
+
+		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+		retval=$?
+		if [ $should_fwd -eq 1 ]; then
+			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+		else
+			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+		fi
+	done
+}
+
+check_sg_state()
+{
+	local is_blocked=$1; shift
+	local sources=("$@")
+	local should_fail=1
+
+	if [ $is_blocked -eq 1 ]; then
+		should_fail=0
+	fi
+
+	for src in "${sources[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+				 .source_list[] |
+				 select(.address == \"$src\") |
+				 select(.timer == \"0.00\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has zero timer"
+
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+				 .flags[] == \"blocked\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has blocked flag"
+	done
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 98ae11cf8104a27fcd3dddaed4714be0600df419 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:23 +0200
Subject: selftests: net: bridge: add tests for igmpv3 is_include and inc ->
 allow reports

First we test is_include/include mode then we build on that with allow
effectively achieving:
  state          report        result                 action
 INCLUDE (A)    ALLOW (B)    INCLUDE (A+B)           (B)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 82 +++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 19c1f46d1151..e9999e346ea6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,11 +1,20 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
 TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+ALL_GROUP="224.0.0.1"
+ALL_MAC="01:00:5e:00:00:01"
+
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
+MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+
 source lib.sh
 
 h1_create()
@@ -210,6 +219,77 @@ check_sg_state()
 	done
 }
 
+v3include_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+	ip link set dev br0 type bridge mcast_igmp_version 3
+	check_err $? "Could not change bridge IGMP version to 3"
+
+	$MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+	check_err $? "Missing *,G entry with source list"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+	check_sg_entries "is_include" "${X[@]}"
+}
+
+v3cleanup()
+{
+	local port=$1
+	local group=$2
+
+	bridge mdb del dev br0 port $port grp $group
+	ip link set dev br0 type bridge mcast_igmp_version 2
+}
+
+v3include_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_allow_test()
+{
+	RET=0
+	local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
+	sleep 1
+	check_sg_entries "allow" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> allow"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 47021771064cc99fd106783ddc698b76684ec3f0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:24 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> is_include report

The test checks for the following case:
   state          report        result                 action
 INCLUDE (A)    IS_IN (B)     INCLUDE (A+B)            (B)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 25 +++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e9999e346ea6..added5c69e8b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -12,6 +12,8 @@ ALL_MAC="01:00:5e:00:00:01"
 
 # IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
 MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 
@@ -290,6 +292,27 @@ v3inc_allow_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_is_include_test()
+{
+	RET=0
+	local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
+	sleep 1
+	check_sg_entries "is_include" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 3c8b9fdad00481dfb0ca4ce81a5fec6c18fd77bc Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:25 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> is_exclude report

The test checks for the following case:
   state          report        result                 action
  INCLUDE (A)    IS_EX (B)     EXCLUDE (A*B,B-A)      (B-A)=0
                                                      Delete (A-B)
                                                      Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 41 +++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index added5c69e8b..34d2c4370fa6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
+	   v3inc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -16,6 +17,8 @@ MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:
 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
+MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
 
 source lib.sh
 
@@ -313,6 +316,42 @@ v3inc_is_include_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_is_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 735af7bec0f128e67192512854db459f61b1c278 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:26 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> to_exclude report

The test checks for the following case:
  state          report        result                  action
 INCLUDE (A)    TO_EX (B)     EXCLUDE (A*B,B-A)       (B-A)=0
                                                      Delete (A-B)
                                                      Send Q(G,A*B)
                                                      Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 51 +++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 34d2c4370fa6..36f10a3168cc 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -19,6 +19,8 @@ MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
 
 source lib.sh
 
@@ -352,6 +354,53 @@ v3inc_is_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_to_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1")
+	local Y=("192.0.2.20" "192.0.2.30")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.21\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From eecd8cfdff1b9e437ca9162d058de0cee68c1fe6 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:27 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> allow report

The test checks for the following case:
   state          report        result                  action
  EXCLUDE (X,Y)  ALLOW (A)     EXCLUDE (X+A,Y-A)       (A)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 83 +++++++++++++++-------
 1 file changed, 59 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 36f10a3168cc..d786e75abe2c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -17,6 +17,8 @@ MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:
 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
+MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
 # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
@@ -250,6 +252,38 @@ v3include_prepare()
 	check_sg_entries "is_include" "${X[@]}"
 }
 
+v3exclude_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local pkt=$4
+	local X=("192.0.2.1" "192.0.2.2")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3include_prepare $host1_if $mac $group
+
+	$MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+}
+
 v3cleanup()
 {
 	local port=$1
@@ -321,30 +355,8 @@ v3inc_is_include_test()
 v3inc_is_exclude_test()
 {
 	RET=0
-	local X=("192.0.2.1" "192.0.2.2")
-	local Y=("192.0.2.20" "192.0.2.21")
-
-	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
-
-	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC" -q
-	sleep 1
-	bridge -j -d -s mdb show dev br0 \
-		| jq -e ".[].mdb[] | \
-			 select(.grp == \"$TEST_GROUP\" and \
-				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
-	check_err $? "Wrong *,G entry filter mode"
-
-	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
-
-	bridge -j -d -s mdb show dev br0 \
-		| jq -e ".[].mdb[] | \
-			 select(.grp == \"$TEST_GROUP\" and \
-				.source_list != null and
-				.source_list[].address == \"192.0.2.3\")" &>/dev/null
-	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
 
 	check_sg_fwding 1 "${X[@]}" 192.0.2.100
 	check_sg_fwding 0 "${Y[@]}"
@@ -401,6 +413,29 @@ v3inc_to_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_allow_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+	local Y=("192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+	sleep 1
+	check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From e7e7ab7c00c2ecffa64defb54fe938d61fc19d39 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:28 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> is_include report

The test checks for the following case:
    state          report        result                  action
   EXCLUDE (X,Y)  IS_IN (A)     EXCLUDE (X+A,Y-A)       (A)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 27 +++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index d786e75abe2c..b2b0f7d7e860 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -15,6 +15,8 @@ ALL_MAC="01:00:5e:00:00:01"
 MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
 # IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30
+MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
@@ -436,6 +438,29 @@ v3exc_allow_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_is_include_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+	local Y=("192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
+	sleep 1
+	check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 7b4f7138221a483e7642e582518814d579edf36a Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:29 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> is_exclude report

The test checks for the following case:
     state          report        result                  action
   EXCLUDE (X,Y)  IS_EX (A)     EXCLUDE (A-Y,Y*A)        (A-X-Y)=GMI
                                                         Delete (X-A)
                                                         Delete (Y-A)
                                                         Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 28 +++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index b2b0f7d7e860..91b0b26428f6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
+	   v3exc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -23,6 +24,8 @@ MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:0
 MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30
+MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
 MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
 
@@ -461,6 +464,29 @@ v3exc_is_include_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_is_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.30")
+	local Y=("192.0.2.20")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
+	sleep 1
+	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 65bfc146ab95ad980134fe206467d1d7108e402e Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:30 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> to_exclude report

The test checks for the following case:
    state          report        result                  action
  EXCLUDE (X,Y)  TO_EX (A)     EXCLUDE (A-Y,Y*A)        (A-X-Y)=Group Timer
                                                        Delete (X-A)
                                                        Delete (Y-A)
                                                        Send Q(G,A-Y)
                                                        Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 91b0b26428f6..3cfc30b88285 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -487,6 +487,34 @@ v3exc_is_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_to_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.30")
+	local Y=("192.0.2.20")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+	sleep 1
+	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 80899f1b1c05a07f907bc54d6dc5bdadb37ab4f1 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:31 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> block report

The test checks for the following case:
    state          report        result                  action
  INCLUDE (A)    BLOCK (B)     INCLUDE (A)              Send Q(G,A*B)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 33 +++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 3cfc30b88285..3772c7a066c9 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -28,6 +28,8 @@ MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:
 MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
 MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
 
 source lib.sh
 
@@ -515,6 +517,35 @@ v3exc_to_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_block_test()
+{
+	RET=0
+	local X=("192.0.2.2" "192.0.2.3")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+	# make sure the lowered timers have expired (by default 2 seconds)
+	sleep 3
+	check_sg_entries "block" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> block"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 9eb58e07470bfb5a1c1d4ae08806b82d662171f7 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:32 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> block report

The test checks for the following case:
   state          report        result                  action
 EXCLUDE (X,Y)  BLOCK (A)     EXCLUDE (X+(A-Y),Y)      (A-X-Y)=Group Timer
                                                       Send Q(G,A-Y)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 3772c7a066c9..45c5619666d8 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -546,6 +546,34 @@ v3inc_block_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_block_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.30")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+	sleep 1
+	check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> block"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 18f66c96ea585ca7bdd5c75eae3077566b0d73c0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:33 +0200
Subject: selftests: net: bridge: add test for igmpv3 exclude timeout

Test that when a group in exclude mode expires it changes mode to
include and the blocked entries are deleted.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 49 +++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 45c5619666d8..db0a03e30868 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,8 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
+	   v3exc_timeout_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -574,6 +575,52 @@ v3exc_block_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_timeout_test()
+{
+	RET=0
+	local X=("192.0.2.20" "192.0.2.30")
+
+	# GMI should be 3 seconds
+	ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+	ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+	sleep 3
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+
+	check_sg_entries "allow" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 192.0.2.100
+
+	log_test "IGMPv3 group $TEST_GROUP exclude timeout"
+
+	ip link set dev br0 type bridge mcast_query_interval 12500 \
+					mcast_query_response_interval 1000
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 414ea3754149847ec9491de9e9923750f5447331 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:34 +0200
Subject: selftests: net: bridge: add test for igmpv3 *,g auto-add

When we have *,G ports in exclude mode and a new S,G,port is added
the kernel has to automatically create an S,G entry for each exclude
port to get proper forwarding.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index db0a03e30868..0e71abdd7a03 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -4,7 +4,7 @@
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
 	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
-	   v3exc_timeout_test"
+	   v3exc_timeout_test v3star_ex_auto_add_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -621,6 +621,35 @@ v3exc_timeout_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3star_ex_auto_add_test()
+{
+	RET=0
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+				.port == \"$swp1\")" &>/dev/null
+	check_err $? "S,G entry for *,G port doesn't exist"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+				.port == \"$swp1\" and \
+				.flags[] == \"added_by_star_ex\")" &>/dev/null
+	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+	check_sg_fwding 1 192.0.2.3
+
+	log_test "IGMPv3 S,G port entry automatic add to a *,G port"
+
+	v3cleanup $swp1 $TEST_GROUP
+	v3cleanup $swp2 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 7a60c2dd0f575ab14a457e99582af0ca1e072a74 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 28 Oct 2020 16:15:26 -0300
Subject: drm: Remove SCATTERLIST_MAX_SEGMENT

Since commit 9a40401cfa13 ("lib/scatterlist: Do not limit max_segment to
PAGE_ALIGNED values") the max_segment input to sg_alloc_table_from_pages()
does not have to be any special value. The new algorithm will always
create something less than what the user provides. Thus eliminate this
confusing constant.

- vmwgfx should use the HW capability, not mix in the OS page size for
  calling dma_set_max_seg_size()

- i915 uses i915_sg_segment_size() both for sg_alloc_table_from_pages
  and for some open coded sgl construction. This doesn't change the value
  since rounddown(size, UINT_MAX) == SCATTERLIST_MAX_SEGMENT

- drm_prime_pages_to_sg uses it as a default if max_segment is zero,
  UINT_MAX is fine to use directly.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Qian Cai <cai@lca.pw>
Cc: "Ursulin, Tvrtko" <tvrtko.ursulin@intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/0-v1-44733fccd781+13d-rm_scatterlist_max_jgg@nvidia.com
---
 tools/testing/scatterlist/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index b2c7e9f7b8d3..d264bf853034 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -50,7 +50,7 @@ static void fail(struct test *test, struct sg_table *st, const char *cond)
 
 int main(void)
 {
-	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
+	const unsigned int sgmax = UINT_MAX;
 	struct test *test, tests[] = {
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
-- 
cgit v1.2.3


From 338b5da31de0d816b5718dad0e09482a27d51504 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 29 Oct 2020 21:09:31 +0200
Subject: selftests/net: timestamping: add ptp v2 support

The timestamping tool is supporting now only PTPv1 (IEEE-1588 2002) while
modern HW often supports also/only PTPv2.

Hence timestamping tool is still useful for sanity testing of PTP drivers
HW timestamping capabilities it's reasonable to upstate it to support
PTPv2. This patch adds corresponding support which can be enabled by using
new parameter "PTPV2".

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20201029190931.30883-1-grygorii.strashko@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/timestamping.c | 47 +++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index f4bb4fef0f39..21091be70688 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -59,7 +59,8 @@ static void usage(const char *error)
 	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
 	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
 	       "  SIOCGSTAMP - check last socket time stamp\n"
-	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
+	       "  SIOCGSTAMPNS - more accurate socket time stamp\n"
+	       "  PTPV2 - use PTPv2 messages\n");
 	exit(1);
 }
 
@@ -115,13 +116,28 @@ static const unsigned char sync[] = {
 	0x00, 0x00, 0x00, 0x00
 };
 
-static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+static const unsigned char sync_v2[] = {
+	0x00, 0x02, 0x00, 0x2C,
+	0x00, 0x00, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0xFF,
+	0xFE, 0x00, 0x00, 0x00,
+	0x00, 0x01, 0x00, 0x01,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2)
 {
+	size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+	const void *sync_p = ptpv2 ? sync_v2 : sync;
 	struct timeval now;
 	int res;
 
-	res = sendto(sock, sync, sizeof(sync), 0,
-		addr, addr_len);
+	res = sendto(sock, sync_p, sync_len, 0, addr, addr_len);
 	gettimeofday(&now, 0);
 	if (res < 0)
 		printf("%s: %s\n", "send", strerror(errno));
@@ -134,9 +150,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
 static void printpacket(struct msghdr *msg, int res,
 			char *data,
 			int sock, int recvmsg_flags,
-			int siocgstamp, int siocgstampns)
+			int siocgstamp, int siocgstampns, int ptpv2)
 {
 	struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+	size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+	const void *sync_p = ptpv2 ? sync_v2 : sync;
 	struct cmsghdr *cmsg;
 	struct timeval tv;
 	struct timespec ts;
@@ -210,10 +228,9 @@ static void printpacket(struct msghdr *msg, int res,
 					"probably SO_EE_ORIGIN_TIMESTAMPING"
 #endif
 					);
-				if (res < sizeof(sync))
+				if (res < sync_len)
 					printf(" => truncated data?!");
-				else if (!memcmp(sync, data + res - sizeof(sync),
-							sizeof(sync)))
+				else if (!memcmp(sync_p, data + res - sync_len, sync_len))
 					printf(" => GOT OUR DATA BACK (HURRAY!)");
 				break;
 			}
@@ -257,7 +274,7 @@ static void printpacket(struct msghdr *msg, int res,
 }
 
 static void recvpacket(int sock, int recvmsg_flags,
-		       int siocgstamp, int siocgstampns)
+		       int siocgstamp, int siocgstampns, int ptpv2)
 {
 	char data[256];
 	struct msghdr msg;
@@ -288,7 +305,7 @@ static void recvpacket(int sock, int recvmsg_flags,
 	} else {
 		printpacket(&msg, res, data,
 			    sock, recvmsg_flags,
-			    siocgstamp, siocgstampns);
+			    siocgstamp, siocgstampns, ptpv2);
 	}
 }
 
@@ -300,6 +317,7 @@ int main(int argc, char **argv)
 	int siocgstamp = 0;
 	int siocgstampns = 0;
 	int ip_multicast_loop = 0;
+	int ptpv2 = 0;
 	char *interface;
 	int i;
 	int enabled = 1;
@@ -335,6 +353,8 @@ int main(int argc, char **argv)
 			siocgstampns = 1;
 		else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
 			ip_multicast_loop = 1;
+		else if (!strcasecmp(argv[i], "PTPV2"))
+			ptpv2 = 1;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
 			so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
@@ -369,6 +389,7 @@ int main(int argc, char **argv)
 		HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
 	hwconfig.rx_filter =
 		(so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+		ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC :
 		HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
 	hwconfig_requested = hwconfig;
 	if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
@@ -496,16 +517,16 @@ int main(int argc, char **argv)
 					printf("has error\n");
 				recvpacket(sock, 0,
 					   siocgstamp,
-					   siocgstampns);
+					   siocgstampns, ptpv2);
 				recvpacket(sock, MSG_ERRQUEUE,
 					   siocgstamp,
-					   siocgstampns);
+					   siocgstampns, ptpv2);
 			}
 		} else {
 			/* write one packet */
 			sendpacket(sock,
 				   (struct sockaddr *)&addr,
-				   sizeof(addr));
+				   sizeof(addr), ptpv2);
 			next.tv_sec += 5;
 			continue;
 		}
-- 
cgit v1.2.3


From bbbc7aa45eefd4ef7ffbd5ee3bb49bd8b68a2213 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 30 Oct 2020 21:10:54 +0100
Subject: selftests: add test script for bareudp tunnels

Test different encapsulation modes of the bareudp module:
  * Unicast MPLS,
  * IPv4 only,
  * IPv4 in multiproto mode (that is, IPv4 and IPv6),
  * IPv6.

Each mode is tested with both an IPv4 and an IPv6 underlay.

v2:
  * Add build dependencies in config file (Willem de Bruijn).
  * The MPLS test now uses its own IP addresses. This minimises
    the amount of cleanup between tests and simplifies the script.
  * Verify that iproute2 supports bareudp tunnels before running the
    script (and other minor usability improvements).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/8abc0e58f8a7eeb404f82466505a73110bc43ab8.1604088587.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile   |   1 +
 tools/testing/selftests/net/bareudp.sh | 538 +++++++++++++++++++++++++++++++++
 tools/testing/selftests/net/config     |   7 +
 3 files changed, 546 insertions(+)
 create mode 100755 tools/testing/selftests/net/bareudp.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ef352477cac6..fa5fa425d148 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh
 TEST_PROGS += devlink_port_split.py
 TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
+TEST_PROGS += bareudp.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
new file mode 100755
index 000000000000..c6fe22de7d0e
--- /dev/null
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -0,0 +1,538 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Test various bareudp tunnel configurations.
+#
+# The bareudp module allows to tunnel network protocols like IP or MPLS over
+# UDP, without adding any intermediate header. This scripts tests several
+# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting
+# IPv4, IPv6 or MPLS packets on the overlay).
+#
+# Network topology:
+#
+#   * A chain of 4 network namespaces, connected with veth pairs. Each veth
+#     is assigned an IPv4 and an IPv6 address. A host-route allows a veth to
+#     join its peer.
+#
+#   * NS0 and NS3 are at the extremities of the chain. They have additional
+#     IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0
+#     and NS3, so that they can communicate using these overlay IP addresses.
+#     For IPv4 and IPv6 reachability tests, the route simply sets the peer's
+#     veth address as gateway. For MPLS reachability tests, an MPLS header is
+#     also pushed before the IP header.
+#
+#   * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to
+#     encapsulate the traffic into UDP.
+#
+# +-----------------------------------------------------------------------+
+# |                                  NS0                                  |
+# |                                                                       |
+# |   lo:                                                                 |
+# |      * IPv4 address: 192.0.2.100/32                                   |
+# |      * IPv6 address: 2001:db8::100/128                                |
+# |      * IPv6 address: 2001:db8::200/128                                |
+# |      * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11            |
+# |      * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11       |
+# |      * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11       |
+# |                    (encapsulated with MPLS label 203)                 |
+# |                                                                       |
+# |   veth01:                                                             |
+# |   ^  * IPv4 address: 192.0.2.10, peer 192.0.2.11/32                   |
+# |   |  * IPv6 address: 2001:db8::10, peer 2001:db8::11/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test)
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS1                                  |
+# |   |                                                                   |
+# |   v                                                                   |
+# |   veth10:                                                             |
+# |      * IPv4 address: 192.0.2.11, peer 192.0.2.10/32                   |
+# |      * IPv6 address: 2001:db8::11, peer 2001:db8::10/128              |
+# |                                                                       |
+# |   bareudp_ns1:                                                        |
+# |      * Encapsulate IP or MPLS packets received on veth10 into UDP     |
+# |        and send the resulting packets through veth12.                 |
+# |      * Decapsulate bareudp packets (either IP or MPLS, over UDP)      |
+# |        received on veth12 and send the inner packets through veth10.  |
+# |                                                                       |
+# |   veth12:                                                             |
+# |   ^  * IPv4 address: 192.0.2.21, peer 192.0.2.22/32                   |
+# |   |  * IPv6 address: 2001:db8::21, peer 2001:db8::22/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test), over UDP
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS2                                  |
+# |   |                                                                   |
+# |   v                                                                   |
+# |   veth21:                                                             |
+# |      * IPv4 address: 192.0.2.22, peer 192.0.2.21/32                   |
+# |      * IPv6 address: 2001:db8::22, peer 2001:db8::21/128              |
+# |                                                                       |
+# |   bareudp_ns2:                                                        |
+# |      * Decapsulate bareudp packets (either IP or MPLS, over UDP)      |
+# |        received on veth21 and send the inner packets through veth23.  |
+# |      * Encapsulate IP or MPLS packets received on veth23 into UDP     |
+# |        and send the resulting packets through veth21.                 |
+# |                                                                       |
+# |   veth23:                                                             |
+# |   ^  * IPv4 address: 192.0.2.32, peer 192.0.2.33/32                   |
+# |   |  * IPv6 address: 2001:db8::32, peer 2001:db8::33/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test)
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS3                                  |
+# |   v                                                                   |
+# |   veth32:                                                             |
+# |      * IPv4 address: 192.0.2.33, peer 192.0.2.32/32                   |
+# |      * IPv6 address: 2001:db8::33, peer 2001:db8::32/128              |
+# |                                                                       |
+# |   lo:                                                                 |
+# |      * IPv4 address: 192.0.2.103/32                                   |
+# |      * IPv6 address: 2001:db8::103/128                                |
+# |      * IPv6 address: 2001:db8::203/128                                |
+# |      * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32            |
+# |      * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32       |
+# |      * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32       |
+# |                    (encapsulated with MPLS label 200)                 |
+# |                                                                       |
+# +-----------------------------------------------------------------------+
+
+ERR=4 # Return 4 by default, which is the SKIP code for kselftest
+PING6="ping"
+PAUSE_ON_FAIL="no"
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+readonly NS2=$(mktemp -u ns2-XXXXXXXX)
+readonly NS3=$(mktemp -u ns3-XXXXXXXX)
+
+# Exit the script after having removed the network namespaces it created
+#
+# Parameters:
+#
+#   * The list of network namespaces to delete before exiting.
+#
+exit_cleanup()
+{
+	for ns in "$@"; do
+		ip netns delete "${ns}" 2>/dev/null || true
+	done
+
+	if [ "${ERR}" -eq 4 ]; then
+		echo "Error: Setting up the testing environment failed." >&2
+	fi
+
+	exit "${ERR}"
+}
+
+# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3)
+#
+# New namespaces are cleaned up manually in case of error, to ensure that only
+# namespaces created by this script are deleted.
+create_namespaces()
+{
+	ip netns add "${NS0}" || exit_cleanup
+	ip netns add "${NS1}" || exit_cleanup "${NS0}"
+	ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
+	ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+	exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+}
+
+# Configure a network interface using a host route
+#
+# Parameters
+#
+#   * $1: the netns the network interface resides in,
+#   * $2: the network interface name,
+#   * $3: the local IPv4 address to assign to this interface,
+#   * $4: the IPv4 address of the remote network interface,
+#   * $5: the local IPv6 address to assign to this interface,
+#   * $6: the IPv6 address of the remote network interface.
+#
+iface_config()
+{
+	local NS="${1}"; readonly NS
+	local DEV="${2}"; readonly DEV
+	local LOCAL_IP4="${3}"; readonly LOCAL_IP4
+	local PEER_IP4="${4}"; readonly PEER_IP4
+	local LOCAL_IP6="${5}"; readonly LOCAL_IP6
+	local PEER_IP6="${6}"; readonly PEER_IP6
+
+	ip -netns "${NS}" link set dev "${DEV}" up
+	ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}"
+	ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad
+}
+
+# Create base networking topology:
+#
+#   * set up the loopback device in all network namespaces (NS0..NS3),
+#   * set up a veth pair to connect each netns in sequence (NS0 with NS1,
+#     NS1 with NS2, etc.),
+#   * add and IPv4 and an IPv6 address on each veth interface,
+#   * prepare the ingress qdiscs in the intermediate namespaces.
+#
+setup_underlay()
+{
+	for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
+		ip -netns "${ns}" link set dev lo up
+	done;
+
+	ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
+	ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
+	ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
+	iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128
+	iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128
+	iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128
+	iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128
+	iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128
+	iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128
+
+	tc -netns "${NS1}" qdisc add dev veth10 ingress
+	tc -netns "${NS2}" qdisc add dev veth23 ingress
+}
+
+# Set up the IPv4, IPv6 and MPLS overlays.
+#
+# Configuration is similar for all protocols:
+#
+#   * add an overlay IP address on the loopback interface of each edge
+#     namespace,
+#   * route these IP addresses via the intermediate namespaces (for the MPLS
+#     tests, this is also where MPLS encapsulation is done),
+#   * add routes for these IP addresses (or MPLS labels) in the intermediate
+#     namespaces.
+#
+# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be
+# done just before running the reachability tests.
+
+setup_overlay_ipv4()
+{
+	# Add the overlay IP addresses and route them through the veth devices
+	ip -netns "${NS0}" address add 192.0.2.100/32 dev lo
+	ip -netns "${NS3}" address add 192.0.2.103/32 dev lo
+	ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11
+	ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32
+
+	# Route the overlay addresses in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
+	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
+	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+}
+
+setup_overlay_ipv6()
+{
+	# Add the overlay IP addresses and route them through the veth devices
+	ip -netns "${NS0}" address add 2001:db8::100/128 dev lo
+	ip -netns "${NS3}" address add 2001:db8::103/128 dev lo
+	ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11
+	ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32
+
+	# Route the overlay addresses in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10
+	ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33
+}
+
+setup_overlay_mpls()
+{
+	# Add specific overlay IP addresses, routed over MPLS
+	ip -netns "${NS0}" address add 2001:db8::200/128 dev lo
+	ip -netns "${NS3}" address add 2001:db8::203/128 dev lo
+	ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11
+	ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32
+
+	# Route the MPLS packets in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256
+	ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256
+	ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10
+	ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33
+}
+
+# Run "ping" from NS0 and print the result
+#
+# Parameters:
+#
+#   * $1: the variant of ping to use (normally either "ping" or "ping6"),
+#   * $2: the IP address to ping,
+#   * $3: a human readable description of the purpose of the test.
+#
+# If the test fails and PAUSE_ON_FAIL is active, the user is given the
+# possibility to continue with the next test or to quit immediately.
+#
+ping_test_one()
+{
+	local PING="$1"; readonly PING
+	local IP="$2"; readonly IP
+	local MSG="$3"; readonly MSG
+	local RET
+
+	printf "TEST: %-60s  " "${MSG}"
+
+	set +e
+	ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1
+	RET=$?
+	set -e
+
+	if [ "${RET}" -eq 0 ]; then
+		printf "[ OK ]\n"
+	else
+		ERR=1
+		printf "[FAIL]\n"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			printf "\nHit enter to continue, 'q' to quit\n"
+			read a
+			if [ "$a" = "q" ]; then
+				exit 1
+			fi
+		fi
+	fi
+}
+
+# Run reachability tests
+#
+# Parameters:
+#
+#   * $1: human readable string describing the underlay protocol.
+#
+# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling
+# function.
+#
+ping_test()
+{
+	local UNDERLAY="$1"; readonly UNDERLAY
+	local MODE
+	local MSG
+
+	if [ "${MULTIPROTO}" = "multiproto" ]; then
+		MODE=" (multiproto mode)"
+	else
+		MODE=""
+	fi
+
+	if [ $IPV4 ]; then
+		ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}"
+	fi
+	if [ $IPV6 ]; then
+		ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}"
+	fi
+	if [ $MPLS_UC ]; then
+		ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}"
+	fi
+}
+
+# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6
+#
+# Parameters:
+#
+#   * $1: the packet type (protocol) to be handled by bareudp,
+#   * $2: a flag to activate or deactivate bareudp's "multiproto" mode.
+#
+test_overlay()
+{
+	local ETHERTYPE="$1"; readonly ETHERTYPE
+	local MULTIPROTO="$2"; readonly MULTIPROTO
+	local IPV4
+	local IPV6
+	local MPLS_UC
+
+	case "${ETHERTYPE}" in
+		"ipv4")
+			IPV4="ipv4"
+			if [ "${MULTIPROTO}" = "multiproto" ]; then
+				IPV6="ipv6"
+			else
+				IPV6=""
+			fi
+			MPLS_UC=""
+			;;
+		"ipv6")
+			IPV6="ipv6"
+			IPV4=""
+			MPLS_UC=""
+			;;
+		"mpls_uc")
+			MPLS_UC="mpls_uc"
+			IPV4=""
+			IPV6=""
+			;;
+		*)
+			exit 1
+			;;
+	esac
+	readonly IPV4
+	readonly IPV6
+	readonly MPLS_UC
+
+	# Create the bareudp devices in the intermediate namespaces
+	ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+	ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+
+	# IPv4 over UDPv4
+	if [ $IPV4 ]; then
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4         \
+			flower dst_ip 192.0.2.103/32                                   \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4         \
+			flower dst_ip 192.0.2.100/32                                   \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# IPv6 over UDPv4
+	if [ $IPV6 ]; then
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6         \
+			flower dst_ip 2001:db8::103/128                                \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6         \
+			flower dst_ip 2001:db8::100/128                                \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# MPLS (unicast) over UDPv4
+	if [ $MPLS_UC ]; then
+		ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1
+		ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1
+
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc      \
+			flower mpls_label 203                                          \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc      \
+			flower mpls_label 200                                          \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# Test IPv4 underlay
+	ping_test "UDPv4"
+
+	# Cleanup bareudp encapsulation instructions, as they were specific to
+	# the IPv4 underlay, before setting up and testing the IPv6 underlay
+	tc -netns "${NS1}" filter delete dev veth10 ingress
+	tc -netns "${NS2}" filter delete dev veth23 ingress
+
+	# IPv4 over UDPv6
+	if [ $IPV4 ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4             \
+			flower dst_ip 192.0.2.103/32                                       \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4             \
+			flower dst_ip 192.0.2.100/32                                       \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# IPv6 over UDPv6
+	if [ $IPV6 ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6             \
+			flower dst_ip 2001:db8::103/128                                    \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6             \
+			flower dst_ip 2001:db8::100/128                                    \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# MPLS (unicast) over UDPv6
+	if [ $MPLS_UC ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc          \
+			flower mpls_label 203                                              \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc          \
+			flower mpls_label 200                                              \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# Test IPv6 underlay
+	ping_test "UDPv6"
+
+	tc -netns "${NS1}" filter delete dev veth10 ingress
+	tc -netns "${NS2}" filter delete dev veth23 ingress
+	ip -netns "${NS1}" link delete bareudp_ns1
+	ip -netns "${NS2}" link delete bareudp_ns2
+}
+
+check_features()
+{
+	ip link help 2>&1 | grep -q bareudp
+	if [ $? -ne 0 ]; then
+		echo "Missing bareudp support in iproute2" >&2
+		exit_cleanup
+	fi
+
+	# Use ping6 on systems where ping doesn't handle IPv6
+	ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6"
+}
+
+usage()
+{
+	echo "Usage: $0 [-p]"
+	exit 1
+}
+
+while getopts :p o
+do
+	case $o in
+		p) PAUSE_ON_FAIL="yes";;
+		*) usage;;
+	esac
+done
+
+check_features
+
+# Create namespaces before setting up the exit trap.
+# Otherwise, exit_cleanup_all() could delete namespaces that were not created
+# by this script.
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_underlay
+setup_overlay_ipv4
+setup_overlay_ipv6
+setup_overlay_mpls
+
+test_overlay ipv4 nomultiproto
+test_overlay ipv6 nomultiproto
+test_overlay ipv4 multiproto
+test_overlay mpls_uc nomultiproto
+
+if [ "${ERR}" -eq 1 ]; then
+	echo "Some tests failed." >&2
+else
+	ERR=0
+fi
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 4d5df8e1eee7..614d5477365a 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -34,3 +34,10 @@ CONFIG_TRACEPOINTS=y
 CONFIG_NET_DROP_MONITOR=m
 CONFIG_NETDEVSIM=m
 CONFIG_NET_FOU=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_BAREUDP=m
-- 
cgit v1.2.3


From e1eb075ccf3766860b7aa3f104ca29dcb8a46ed0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Sep 2020 14:33:38 -0700
Subject: rcutorture: Make preemptible TRACE02 enable lockdep

Currently, the CONFIG_PREEMPT_NONE=y rcutorture TRACE01 rcutorture
scenario enables lockdep.  This limits its ability to find bugs due to
non-preemptible sections of code being RCU readers, and pretty much all
code thus appearing to lockdep to be an RCU reader.  This commit therefore
moves lockdep testing to the CONFIG_PREEMPT=y rcutorture TRACE02 scenario.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 6 +++---
 tools/testing/selftests/rcutorture/configs/rcu/TRACE02 | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 12e7661b86f5..34c8ff5a12f2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -4,8 +4,8 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
-#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
 CONFIG_TASKS_TRACE_RCU_READ_MB=y
 CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index b69ed6673c41..77541eeb4e9f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -4,8 +4,8 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_PROVE_LOCKING=n
-#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=n
 CONFIG_RCU_EXPERT=y
-- 
cgit v1.2.3


From 08c7974293851da6a64989b5ce7a0750e58178b1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 28 Aug 2020 06:46:03 -0700
Subject: torture: Don't kill gdb sessions

The rcutorture scripting will do a "kill -9" on any guest OS that exceeds
its --duration by more than a few minutes, which is very valuable when
bugs result in hangs.  However, this is a problem when the "hang" was due
to a --gdb debugging session.

This commit therefore refrains from killing the guest OS when a debugging
session is in progress.  This means that the user must manually kill the
kvm.sh process group if a hang really does occur.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 6dc2b49b85ea..d04966ab88cc 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -206,7 +206,10 @@ do
 	kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
 	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
 	then
-		if test $kruntime -ge $seconds -o -f "$TORTURE_STOPFILE"
+		if test -n "$TORTURE_KCONFIG_GDB_ARG"
+		then
+			:
+		elif test $kruntime -ge $seconds || test -f "$TORTURE_STOPFILE"
 		then
 			break;
 		fi
-- 
cgit v1.2.3


From 716e343f014e2b25320f332677363e884684b742 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20Wei=C3=9F?= <michael.weiss@aisec.fraunhofer.de>
Date: Tue, 27 Oct 2020 21:42:58 +0100
Subject: selftests/timens: added selftest for /proc/stat btime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test that btime value of /proc/stat is as expected in the time namespace
using a simple parser to get btime from /proc/stat.

Signed-off-by: Michael Weiß <michael.weiss@aisec.fraunhofer.de>
Reviewed-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20201027204258.7869-4-michael.weiss@aisec.fraunhofer.de
---
 tools/testing/selftests/timens/procfs.c | 58 ++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 7f14f0fdac84..f2519154208a 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime)
 	return 0;
 }
 
+static int read_proc_stat_btime(unsigned long long *boottime_sec)
+{
+	FILE *proc;
+	char line_buf[2048];
+
+	proc = fopen("/proc/stat", "r");
+	if (proc == NULL) {
+		pr_perror("Unable to open /proc/stat");
+		return -1;
+	}
+
+	while (fgets(line_buf, 2048, proc)) {
+		if (sscanf(line_buf, "btime %llu", boottime_sec) != 1)
+			continue;
+		fclose(proc);
+		return 0;
+	}
+	if (errno) {
+		pr_perror("fscanf");
+		fclose(proc);
+		return -errno;
+	}
+	pr_err("failed to parse /proc/stat");
+	fclose(proc);
+	return -1;
+}
+
 static int check_uptime(void)
 {
 	struct timespec uptime_new, uptime_old;
@@ -123,18 +150,47 @@ static int check_uptime(void)
 	return 0;
 }
 
+static int check_stat_btime(void)
+{
+	unsigned long long btime_new, btime_old;
+	unsigned long long btime_expected;
+
+	if (switch_ns(parent_ns))
+		return pr_err("switch_ns(%d)", parent_ns);
+
+	if (read_proc_stat_btime(&btime_old))
+		return 1;
+
+	if (switch_ns(child_ns))
+		return pr_err("switch_ns(%d)", child_ns);
+
+	if (read_proc_stat_btime(&btime_new))
+		return 1;
+
+	btime_expected = btime_old - TEN_DAYS_IN_SEC;
+	if (btime_new != btime_expected) {
+		pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]",
+			btime_old, btime_new, btime_expected);
+		return 1;
+	}
+
+	ksft_test_result_pass("Passed for /proc/stat btime\n");
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	int ret = 0;
 
 	nscheck();
 
-	ksft_set_plan(1);
+	ksft_set_plan(2);
 
 	if (init_namespaces())
 		return 1;
 
 	ret |= check_uptime();
+	ret |= check_stat_btime();
 
 	if (ret)
 		ksft_exit_fail();
-- 
cgit v1.2.3


From aaf376bddf68d0afe5f4b5f25fc555da358e2287 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:34:48 -0800
Subject: selftests/bpf: Move test_tcppbf_user into test_progs

Recently a bug was missed due to the fact that test_tcpbpf_user is not a
part of test_progs. In order to prevent similar issues in the future move
the test functionality into test_progs. By doing this we can make certain
that it is a part of standard testing and will not be overlooked.

As a part of moving the functionality into test_progs it is necessary to
integrate with the test_progs framework and to drop any redundant code.
This patch:
1. Cleans up the include headers
2. Dropped a duplicate definition of bpf_find_map
3. Switched over to using test_progs specific cgroup functions
4. Renamed main to test_tcpbpf_user
5. Dropped return value in favor of CHECK_FAIL to check for errors

The general idea is that I wanted to keep the changes as small as possible
while moving the file into the test_progs framework. The follow-on patches
are meant to clean up the remaining issues such as the use of CHECK_FAIL.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/160443928881.1086697.17661359319919165370.stgit@localhost.localdomain
---
 tools/testing/selftests/bpf/.gitignore             |   1 -
 tools/testing/selftests/bpf/Makefile               |   3 +-
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 141 ++++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf_user.c     | 165 ---------------------
 4 files changed, 142 insertions(+), 168 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
 delete mode 100644 tools/testing/selftests/bpf/test_tcpbpf_user.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3ab1200e172f..395ae040ce1f 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -8,7 +8,6 @@ FEATURE-DUMP.libbpf
 fixdep
 test_dev_cgroup
 /test_progs*
-test_tcpbpf_user
 test_verifier_log
 feature
 test_sock
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 542768f5195b..50e5b18fc455 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
-	test_verifier_log test_dev_cgroup test_tcpbpf_user \
+	test_verifier_log test_dev_cgroup \
 	test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
 	test_netcnt test_tcpnotify_user test_sysctl \
@@ -163,7 +163,6 @@ $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
 $(OUTPUT)/test_socket_cookie: cgroup_helpers.c
 $(OUTPUT)/test_sockmap: cgroup_helpers.c
-$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
 $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
new file mode 100644
index 000000000000..caa8d3adec8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <test_progs.h>
+
+#include "test_tcpbpf.h"
+
+#define CG_NAME "/tcpbpf-user-test"
+
+/* 3 comes from one listening socket + both ends of the connection */
+#define EXPECTED_CLOSE_EVENTS		3
+
+#define EXPECT_EQ(expected, actual, fmt)			\
+	do {							\
+		if ((expected) != (actual)) {			\
+			printf("  Value of: " #actual "\n"	\
+			       "    Actual: %" fmt "\n"		\
+			       "  Expected: %" fmt "\n",	\
+			       (actual), (expected));		\
+			ret--;					\
+		}						\
+	} while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+	__u32 expected_events;
+	int ret = 0;
+
+	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+			   (1 << BPF_SOCK_OPS_RWND_INIT) |
+			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+			   (1 << BPF_SOCK_OPS_STATE_CB) |
+			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+	EXPECT_EQ(501ULL, result->bytes_received, "llu");
+	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+	EXPECT_EQ(1, result->data_segs_in, PRIu32);
+	EXPECT_EQ(1, result->data_segs_out, PRIu32);
+	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+	EXPECT_EQ(1, result->num_listen, PRIu32);
+	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
+
+	return ret;
+}
+
+int verify_sockopt_result(int sock_map_fd)
+{
+	__u32 key = 0;
+	int ret = 0;
+	int res;
+	int rv;
+
+	/* check setsockopt for SAVE_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(0, res, "d");
+	key = 1;
+	/* check getsockopt for SAVED_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(1, res, "d");
+	return ret;
+}
+
+void test_tcpbpf_user(void)
+{
+	const char *file = "test_tcpbpf_kern.o";
+	int prog_fd, map_fd, sock_map_fd;
+	struct tcpbpf_globals g = {0};
+	int error = EXIT_FAILURE;
+	struct bpf_object *obj;
+	int cg_fd = -1;
+	int retry = 10;
+	__u32 key = 0;
+	int rv;
+
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (cg_fd < 0)
+		goto err;
+
+	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+		printf("FAILED: load_bpf_file failed for: %s\n", file);
+		goto err;
+	}
+
+	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (rv) {
+		printf("FAILED: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		goto err;
+	}
+
+	if (system("./tcp_server.py")) {
+		printf("FAILED: TCP server\n");
+		goto err;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "global_map");
+	if (map_fd < 0)
+		goto err;
+
+	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
+	if (sock_map_fd < 0)
+		goto err;
+
+retry_lookup:
+	rv = bpf_map_lookup_elem(map_fd, &key, &g);
+	if (rv != 0) {
+		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+		goto err;
+	}
+
+	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
+		printf("Unexpected number of close events (%d), retrying!\n",
+		       g.num_close_events);
+		usleep(100);
+		goto retry_lookup;
+	}
+
+	if (verify_result(&g)) {
+		printf("FAILED: Wrong stats\n");
+		goto err;
+	}
+
+	if (verify_sockopt_result(sock_map_fd)) {
+		printf("FAILED: Wrong sockopt stats\n");
+		goto err;
+	}
+
+	error = 0;
+err:
+	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+	if (cg_fd != -1)
+		close(cg_fd);
+
+	CHECK_FAIL(error);
+}
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
deleted file mode 100644
index 74a9e49988b6..000000000000
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <sys/types.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
-#include "cgroup_helpers.h"
-
-#include "test_tcpbpf.h"
-
-/* 3 comes from one listening socket + both ends of the connection */
-#define EXPECTED_CLOSE_EVENTS		3
-
-#define EXPECT_EQ(expected, actual, fmt)			\
-	do {							\
-		if ((expected) != (actual)) {			\
-			printf("  Value of: " #actual "\n"	\
-			       "    Actual: %" fmt "\n"		\
-			       "  Expected: %" fmt "\n",	\
-			       (actual), (expected));		\
-			ret--;					\
-		}						\
-	} while (0)
-
-int verify_result(const struct tcpbpf_globals *result)
-{
-	__u32 expected_events;
-	int ret = 0;
-
-	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
-			   (1 << BPF_SOCK_OPS_RWND_INIT) |
-			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
-			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
-			   (1 << BPF_SOCK_OPS_STATE_CB) |
-			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-
-	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
-	EXPECT_EQ(501ULL, result->bytes_received, "llu");
-	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
-	EXPECT_EQ(1, result->data_segs_in, PRIu32);
-	EXPECT_EQ(1, result->data_segs_out, PRIu32);
-	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
-	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
-	EXPECT_EQ(1, result->num_listen, PRIu32);
-	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
-
-	return ret;
-}
-
-int verify_sockopt_result(int sock_map_fd)
-{
-	__u32 key = 0;
-	int ret = 0;
-	int res;
-	int rv;
-
-	/* check setsockopt for SAVE_SYN */
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(0, res, "d");
-	key = 1;
-	/* check getsockopt for SAVED_SYN */
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(1, res, "d");
-	return ret;
-}
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
-			const char *name)
-{
-	struct bpf_map *map;
-
-	map = bpf_object__find_map_by_name(obj, name);
-	if (!map) {
-		printf("%s:FAIL:map '%s' not found\n", test, name);
-		return -1;
-	}
-	return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
-	const char *file = "test_tcpbpf_kern.o";
-	int prog_fd, map_fd, sock_map_fd;
-	struct tcpbpf_globals g = {0};
-	const char *cg_path = "/foo";
-	int error = EXIT_FAILURE;
-	struct bpf_object *obj;
-	int cg_fd = -1;
-	int retry = 10;
-	__u32 key = 0;
-	int rv;
-
-	cg_fd = cgroup_setup_and_join(cg_path);
-	if (cg_fd < 0)
-		goto err;
-
-	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
-		printf("FAILED: load_bpf_file failed for: %s\n", file);
-		goto err;
-	}
-
-	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
-	if (rv) {
-		printf("FAILED: bpf_prog_attach: %d (%s)\n",
-		       error, strerror(errno));
-		goto err;
-	}
-
-	if (system("./tcp_server.py")) {
-		printf("FAILED: TCP server\n");
-		goto err;
-	}
-
-	map_fd = bpf_find_map(__func__, obj, "global_map");
-	if (map_fd < 0)
-		goto err;
-
-	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
-	if (sock_map_fd < 0)
-		goto err;
-
-retry_lookup:
-	rv = bpf_map_lookup_elem(map_fd, &key, &g);
-	if (rv != 0) {
-		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
-		goto err;
-	}
-
-	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
-		printf("Unexpected number of close events (%d), retrying!\n",
-		       g.num_close_events);
-		usleep(100);
-		goto retry_lookup;
-	}
-
-	if (verify_result(&g)) {
-		printf("FAILED: Wrong stats\n");
-		goto err;
-	}
-
-	if (verify_sockopt_result(sock_map_fd)) {
-		printf("FAILED: Wrong sockopt stats\n");
-		goto err;
-	}
-
-	printf("PASSED!\n");
-	error = 0;
-err:
-	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
-	close(cg_fd);
-	cleanup_cgroup_environment();
-	return error;
-}
-- 
cgit v1.2.3


From 247f0ec361b7e0c5c67db8222873182fb8be5146 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:34:56 -0800
Subject: selftests/bpf: Drop python client/server in favor of threads

Drop the tcp_client/server.py files in favor of using a client and server
thread within the test case. Specifically we spawn a new thread to play the
role of the server, and the main testing thread plays the role of client.

Add logic to the end of the run_test function to guarantee that the sockets
are closed when we begin verifying results.

Doing this we are able to reduce overhead since we don't have two python
workers possibly floating around. In addition we don't have to worry about
synchronization issues and as such the retry loop waiting for the threads
to close the sockets can be dropped as we will have already closed the
sockets in the local executable and synchronized the server thread.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/160443929638.1086697.2430242340980315521.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 95 ++++++++++++++++++----
 tools/testing/selftests/bpf/tcp_client.py          | 50 ------------
 tools/testing/selftests/bpf/tcp_server.py          | 80 ------------------
 3 files changed, 78 insertions(+), 147 deletions(-)
 delete mode 100755 tools/testing/selftests/bpf/tcp_client.py
 delete mode 100755 tools/testing/selftests/bpf/tcp_server.py

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index caa8d3adec8a..616269abdb41 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -1,13 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <inttypes.h>
 #include <test_progs.h>
+#include <network_helpers.h>
 
 #include "test_tcpbpf.h"
 
+#define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-user-test"
 
-/* 3 comes from one listening socket + both ends of the connection */
-#define EXPECTED_CLOSE_EVENTS		3
+static __u32 duration;
 
 #define EXPECT_EQ(expected, actual, fmt)			\
 	do {							\
@@ -42,7 +43,9 @@ int verify_result(const struct tcpbpf_globals *result)
 	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
 	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
 	EXPECT_EQ(1, result->num_listen, PRIu32);
-	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
+
+	/* 3 comes from one listening socket + both ends of the connection */
+	EXPECT_EQ(3, result->num_close_events, PRIu32);
 
 	return ret;
 }
@@ -66,6 +69,75 @@ int verify_sockopt_result(int sock_map_fd)
 	return ret;
 }
 
+static int run_test(void)
+{
+	int listen_fd = -1, cli_fd = -1, accept_fd = -1;
+	char buf[1000];
+	int err = -1;
+	int i, rv;
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n",
+		  listen_fd, errno))
+		goto done;
+
+	cli_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)",
+		  "cli_fd:%d errno:%d\n", cli_fd, errno))
+		goto done;
+
+	accept_fd = accept(listen_fd, NULL, NULL);
+	if (CHECK(accept_fd == -1, "accept(listen_fd)",
+		  "accept_fd:%d errno:%d\n", accept_fd, errno))
+		goto done;
+
+	/* Send 1000B of '+'s from cli_fd -> accept_fd */
+	for (i = 0; i < 1000; i++)
+		buf[i] = '+';
+
+	rv = send(cli_fd, buf, 1000, 0);
+	if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	rv = recv(accept_fd, buf, 1000, 0);
+	if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	/* Send 500B of '.'s from accept_fd ->cli_fd */
+	for (i = 0; i < 500; i++)
+		buf[i] = '.';
+
+	rv = send(accept_fd, buf, 500, 0);
+	if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	rv = recv(cli_fd, buf, 500, 0);
+	if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	/*
+	 * shutdown accept first to guarantee correct ordering for
+	 * bytes_received and bytes_acked when we go to verify the results.
+	 */
+	shutdown(accept_fd, SHUT_WR);
+	err = recv(cli_fd, buf, 1, 0);
+	if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno))
+		goto done;
+
+	shutdown(cli_fd, SHUT_WR);
+	err = recv(accept_fd, buf, 1, 0);
+	CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno);
+done:
+	if (accept_fd != -1)
+		close(accept_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+	if (listen_fd != -1)
+		close(listen_fd);
+
+	return err;
+}
+
 void test_tcpbpf_user(void)
 {
 	const char *file = "test_tcpbpf_kern.o";
@@ -74,7 +146,6 @@ void test_tcpbpf_user(void)
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
 	int cg_fd = -1;
-	int retry = 10;
 	__u32 key = 0;
 	int rv;
 
@@ -94,11 +165,6 @@ void test_tcpbpf_user(void)
 		goto err;
 	}
 
-	if (system("./tcp_server.py")) {
-		printf("FAILED: TCP server\n");
-		goto err;
-	}
-
 	map_fd = bpf_find_map(__func__, obj, "global_map");
 	if (map_fd < 0)
 		goto err;
@@ -107,20 +173,15 @@ void test_tcpbpf_user(void)
 	if (sock_map_fd < 0)
 		goto err;
 
-retry_lookup:
+	if (run_test())
+		goto err;
+
 	rv = bpf_map_lookup_elem(map_fd, &key, &g);
 	if (rv != 0) {
 		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
 		goto err;
 	}
 
-	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
-		printf("Unexpected number of close events (%d), retrying!\n",
-		       g.num_close_events);
-		usleep(100);
-		goto retry_lookup;
-	}
-
 	if (verify_result(&g)) {
 		printf("FAILED: Wrong stats\n");
 		goto err;
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
deleted file mode 100755
index bfff82be3fc1..000000000000
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
-    buf = b''
-    while len(buf) < n:
-        rem = n - len(buf)
-        try: s = sock.recv(rem)
-        except (socket.error) as e: return b''
-        buf += s
-    return buf
-
-def send(sock, s):
-    total = len(s)
-    count = 0
-    while count < total:
-        try: n = sock.send(s)
-        except (socket.error) as e: n = 0
-        if n == 0:
-            return count;
-        count += n
-    return count
-
-
-serverPort = int(sys.argv[1])
-
-# create active socket
-sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try:
-    sock.connect(('::1', serverPort))
-except socket.error as e:
-    sys.exit(1)
-
-buf = b''
-n = 0
-while n < 1000:
-    buf += b'+'
-    n += 1
-
-sock.settimeout(1);
-n = send(sock, buf)
-n = read(sock, 500)
-sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
deleted file mode 100755
index 42ab8882f00f..000000000000
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
-    buf = b''
-    while len(buf) < n:
-        rem = n - len(buf)
-        try: s = sock.recv(rem)
-        except (socket.error) as e: return b''
-        buf += s
-    return buf
-
-def send(sock, s):
-    total = len(s)
-    count = 0
-    while count < total:
-        try: n = sock.send(s)
-        except (socket.error) as e: n = 0
-        if n == 0:
-            return count;
-        count += n
-    return count
-
-
-SERVER_PORT = 12877
-MAX_PORTS = 2
-
-serverPort = SERVER_PORT
-serverSocket = None
-
-# create passive socket
-serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-
-try: serverSocket.bind(('::1', 0))
-except socket.error as msg:
-    print('bind fails: ' + str(msg))
-
-sn = serverSocket.getsockname()
-serverPort = sn[1]
-
-cmdStr = ("./tcp_client.py %d &") % (serverPort)
-os.system(cmdStr)
-
-buf = b''
-n = 0
-while n < 500:
-    buf += b'.'
-    n += 1
-
-serverSocket.listen(MAX_PORTS)
-readList = [serverSocket]
-
-while True:
-    readyRead, readyWrite, inError = \
-        select.select(readList, [], [], 2)
-
-    if len(readyRead) > 0:
-        waitCount = 0
-        for sock in readyRead:
-            if sock == serverSocket:
-                (clientSocket, address) = serverSocket.accept()
-                address = str(address[0])
-                readList.append(clientSocket)
-            else:
-                sock.settimeout(1);
-                s = read(sock, 1000)
-                n = send(sock, buf)
-                sock.close()
-                serverSocket.close()
-                sys.exit(0)
-    else:
-        print('Select timeout!')
-        sys.exit(1)
-- 
cgit v1.2.3


From d3813ea14b696053c076123239093822b527f0f7 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:35:04 -0800
Subject: selftests/bpf: Replace EXPECT_EQ with ASSERT_EQ and refactor
 verify_results

There is already logic in test_progs.h for asserting that a value is
expected to be another value. So instead of reinventing it we should just
make use of ASSERT_EQ in tcpbpf_user.c. This will allow for better
debugging and integrates much more closely with the test_progs framework.

In addition we can refactor the code a bit to merge together the two
verify functions and tie them together into a single function. Doing this
helps to clean the code up a bit and makes it more readable as all the
verification is now done in one function.

Lastly we can relocate the verification to the end of the run_test since it
is logically part of the test itself. With this we can drop the need for a
return value from run_test since verification becomes the last step of the
call and then immediately following is the tear down of the test setup.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/160443930408.1086697.16101205859962113000.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 115 ++++++++-------------
 1 file changed, 43 insertions(+), 72 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 616269abdb41..22c359871af6 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <inttypes.h>
 #include <test_progs.h>
 #include <network_helpers.h>
 
@@ -10,66 +9,56 @@
 
 static __u32 duration;
 
-#define EXPECT_EQ(expected, actual, fmt)			\
-	do {							\
-		if ((expected) != (actual)) {			\
-			printf("  Value of: " #actual "\n"	\
-			       "    Actual: %" fmt "\n"		\
-			       "  Expected: %" fmt "\n",	\
-			       (actual), (expected));		\
-			ret--;					\
-		}						\
-	} while (0)
-
-int verify_result(const struct tcpbpf_globals *result)
+static void verify_result(int map_fd, int sock_map_fd)
 {
-	__u32 expected_events;
-	int ret = 0;
-
-	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
-			   (1 << BPF_SOCK_OPS_RWND_INIT) |
-			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
-			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
-			   (1 << BPF_SOCK_OPS_STATE_CB) |
-			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-
-	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
-	EXPECT_EQ(501ULL, result->bytes_received, "llu");
-	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
-	EXPECT_EQ(1, result->data_segs_in, PRIu32);
-	EXPECT_EQ(1, result->data_segs_out, PRIu32);
-	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
-	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
-	EXPECT_EQ(1, result->num_listen, PRIu32);
+	__u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+				 (1 << BPF_SOCK_OPS_RWND_INIT) |
+				 (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+				 (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+				 (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+				 (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+				 (1 << BPF_SOCK_OPS_STATE_CB) |
+				 (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+	struct tcpbpf_globals result;
+	__u32 key = 0;
+	int res, rv;
+
+	rv = bpf_map_lookup_elem(map_fd, &key, &result);
+	if (CHECK(rv, "bpf_map_lookup_elem(map_fd)", "err:%d errno:%d",
+		  rv, errno))
+		return;
+
+	/* check global map */
+	CHECK(expected_events != result.event_map, "event_map",
+	      "unexpected event_map: actual 0x%08x != expected 0x%08x\n",
+	      result.event_map, expected_events);
+
+	ASSERT_EQ(result.bytes_received, 501, "bytes_received");
+	ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
+	ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
+	ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
+	ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
+	ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
+	ASSERT_EQ(result.num_listen, 1, "num_listen");
 
 	/* 3 comes from one listening socket + both ends of the connection */
-	EXPECT_EQ(3, result->num_close_events, PRIu32);
-
-	return ret;
-}
-
-int verify_sockopt_result(int sock_map_fd)
-{
-	__u32 key = 0;
-	int ret = 0;
-	int res;
-	int rv;
+	ASSERT_EQ(result.num_close_events, 3, "num_close_events");
 
 	/* check setsockopt for SAVE_SYN */
 	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(0, res, "d");
-	key = 1;
+	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
+	      rv, errno);
+	ASSERT_EQ(res, 0, "bpf_setsockopt(TCP_SAVE_SYN)");
+
 	/* check getsockopt for SAVED_SYN */
+	key = 1;
 	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(1, res, "d");
-	return ret;
+	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
+	      rv, errno);
+	ASSERT_EQ(res, 1, "bpf_getsockopt(TCP_SAVED_SYN)");
 }
 
-static int run_test(void)
+static void run_test(int map_fd, int sock_map_fd)
 {
 	int listen_fd = -1, cli_fd = -1, accept_fd = -1;
 	char buf[1000];
@@ -135,18 +124,17 @@ done:
 	if (listen_fd != -1)
 		close(listen_fd);
 
-	return err;
+	if (!err)
+		verify_result(map_fd, sock_map_fd);
 }
 
 void test_tcpbpf_user(void)
 {
 	const char *file = "test_tcpbpf_kern.o";
 	int prog_fd, map_fd, sock_map_fd;
-	struct tcpbpf_globals g = {0};
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
 	int cg_fd = -1;
-	__u32 key = 0;
 	int rv;
 
 	cg_fd = test__join_cgroup(CG_NAME);
@@ -173,24 +161,7 @@ void test_tcpbpf_user(void)
 	if (sock_map_fd < 0)
 		goto err;
 
-	if (run_test())
-		goto err;
-
-	rv = bpf_map_lookup_elem(map_fd, &key, &g);
-	if (rv != 0) {
-		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
-		goto err;
-	}
-
-	if (verify_result(&g)) {
-		printf("FAILED: Wrong stats\n");
-		goto err;
-	}
-
-	if (verify_sockopt_result(sock_map_fd)) {
-		printf("FAILED: Wrong sockopt stats\n");
-		goto err;
-	}
+	run_test(map_fd, sock_map_fd);
 
 	error = 0;
 err:
-- 
cgit v1.2.3


From 0a099d1429c709020277d24a460d11ff8356a080 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:35:11 -0800
Subject: selftests/bpf: Migrate tcpbpf_user.c to use BPF skeleton

Update tcpbpf_user.c to make use of the BPF skeleton. Doing this we can
simplify test_tcpbpf_user and reduce the overhead involved in setting up
the test.

In addition we can clean up the remaining bits such as the one remaining
CHECK_FAIL at the end of test_tcpbpf_user so that the function only makes
use of CHECK as needed.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/160443931155.1086697.17869006617113525162.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 41 ++++++++--------------
 1 file changed, 14 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 22c359871af6..bef81648797a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -3,6 +3,7 @@
 #include <network_helpers.h>
 
 #include "test_tcpbpf.h"
+#include "test_tcpbpf_kern.skel.h"
 
 #define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-user-test"
@@ -130,44 +131,30 @@ done:
 
 void test_tcpbpf_user(void)
 {
-	const char *file = "test_tcpbpf_kern.o";
-	int prog_fd, map_fd, sock_map_fd;
-	int error = EXIT_FAILURE;
-	struct bpf_object *obj;
+	struct test_tcpbpf_kern *skel;
+	int map_fd, sock_map_fd;
 	int cg_fd = -1;
-	int rv;
 
-	cg_fd = test__join_cgroup(CG_NAME);
-	if (cg_fd < 0)
-		goto err;
-
-	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
-		printf("FAILED: load_bpf_file failed for: %s\n", file);
-		goto err;
-	}
+	skel = test_tcpbpf_kern__open_and_load();
+	if (CHECK(!skel, "open and load skel", "failed"))
+		return;
 
-	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
-	if (rv) {
-		printf("FAILED: bpf_prog_attach: %d (%s)\n",
-		       error, strerror(errno));
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")",
+		  "cg_fd:%d errno:%d", cg_fd, errno))
 		goto err;
-	}
 
-	map_fd = bpf_find_map(__func__, obj, "global_map");
-	if (map_fd < 0)
-		goto err;
+	map_fd = bpf_map__fd(skel->maps.global_map);
+	sock_map_fd = bpf_map__fd(skel->maps.sockopt_results);
 
-	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
-	if (sock_map_fd < 0)
+	skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
+	if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)"))
 		goto err;
 
 	run_test(map_fd, sock_map_fd);
 
-	error = 0;
 err:
-	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
 	if (cg_fd != -1)
 		close(cg_fd);
-
-	CHECK_FAIL(error);
+	test_tcpbpf_kern__destroy(skel);
 }
-- 
cgit v1.2.3


From 21b5177e997c98643eaabd4b917f2e287395af86 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:35:19 -0800
Subject: selftest/bpf: Use global variables instead of maps for
 test_tcpbpf_kern

Use global variables instead of global_map and sockopt_results_map to track
test data. Doing this greatly simplifies the code as there is not need to
take the extra steps of updating the maps or looking up elements.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/160443931900.1086697.6588858453575682351.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 51 ++++---------
 .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 86 ++++------------------
 tools/testing/selftests/bpf/test_tcpbpf.h          |  2 +
 3 files changed, 31 insertions(+), 108 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index bef81648797a..ab5281475f44 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -10,7 +10,7 @@
 
 static __u32 duration;
 
-static void verify_result(int map_fd, int sock_map_fd)
+static void verify_result(struct tcpbpf_globals *result)
 {
 	__u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
 				 (1 << BPF_SOCK_OPS_RWND_INIT) |
@@ -20,46 +20,31 @@ static void verify_result(int map_fd, int sock_map_fd)
 				 (1 << BPF_SOCK_OPS_NEEDS_ECN) |
 				 (1 << BPF_SOCK_OPS_STATE_CB) |
 				 (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-	struct tcpbpf_globals result;
-	__u32 key = 0;
-	int res, rv;
-
-	rv = bpf_map_lookup_elem(map_fd, &key, &result);
-	if (CHECK(rv, "bpf_map_lookup_elem(map_fd)", "err:%d errno:%d",
-		  rv, errno))
-		return;
 
 	/* check global map */
-	CHECK(expected_events != result.event_map, "event_map",
+	CHECK(expected_events != result->event_map, "event_map",
 	      "unexpected event_map: actual 0x%08x != expected 0x%08x\n",
-	      result.event_map, expected_events);
+	      result->event_map, expected_events);
 
-	ASSERT_EQ(result.bytes_received, 501, "bytes_received");
-	ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
-	ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
-	ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
-	ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
-	ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
-	ASSERT_EQ(result.num_listen, 1, "num_listen");
+	ASSERT_EQ(result->bytes_received, 501, "bytes_received");
+	ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked");
+	ASSERT_EQ(result->data_segs_in, 1, "data_segs_in");
+	ASSERT_EQ(result->data_segs_out, 1, "data_segs_out");
+	ASSERT_EQ(result->bad_cb_test_rv, 0x80, "bad_cb_test_rv");
+	ASSERT_EQ(result->good_cb_test_rv, 0, "good_cb_test_rv");
+	ASSERT_EQ(result->num_listen, 1, "num_listen");
 
 	/* 3 comes from one listening socket + both ends of the connection */
-	ASSERT_EQ(result.num_close_events, 3, "num_close_events");
+	ASSERT_EQ(result->num_close_events, 3, "num_close_events");
 
 	/* check setsockopt for SAVE_SYN */
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
-	      rv, errno);
-	ASSERT_EQ(res, 0, "bpf_setsockopt(TCP_SAVE_SYN)");
+	ASSERT_EQ(result->tcp_save_syn, 0, "tcp_save_syn");
 
 	/* check getsockopt for SAVED_SYN */
-	key = 1;
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
-	      rv, errno);
-	ASSERT_EQ(res, 1, "bpf_getsockopt(TCP_SAVED_SYN)");
+	ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn");
 }
 
-static void run_test(int map_fd, int sock_map_fd)
+static void run_test(struct tcpbpf_globals *result)
 {
 	int listen_fd = -1, cli_fd = -1, accept_fd = -1;
 	char buf[1000];
@@ -126,13 +111,12 @@ done:
 		close(listen_fd);
 
 	if (!err)
-		verify_result(map_fd, sock_map_fd);
+		verify_result(result);
 }
 
 void test_tcpbpf_user(void)
 {
 	struct test_tcpbpf_kern *skel;
-	int map_fd, sock_map_fd;
 	int cg_fd = -1;
 
 	skel = test_tcpbpf_kern__open_and_load();
@@ -144,14 +128,11 @@ void test_tcpbpf_user(void)
 		  "cg_fd:%d errno:%d", cg_fd, errno))
 		goto err;
 
-	map_fd = bpf_map__fd(skel->maps.global_map);
-	sock_map_fd = bpf_map__fd(skel->maps.sockopt_results);
-
 	skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
 	if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)"))
 		goto err;
 
-	run_test(map_fd, sock_map_fd);
+	run_test(&skel->bss->global);
 
 err:
 	if (cg_fd != -1)
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 3e6912e4df3d..e85e49deba70 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -14,40 +14,7 @@
 #include <bpf/bpf_endian.h>
 #include "test_tcpbpf.h"
 
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 4);
-	__type(key, __u32);
-	__type(value, struct tcpbpf_globals);
-} global_map SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 2);
-	__type(key, __u32);
-	__type(value, int);
-} sockopt_results SEC(".maps");
-
-static inline void update_event_map(int event)
-{
-	__u32 key = 0;
-	struct tcpbpf_globals g, *gp;
-
-	gp = bpf_map_lookup_elem(&global_map, &key);
-	if (gp == NULL) {
-		struct tcpbpf_globals g = {0};
-
-		g.event_map |= (1 << event);
-		bpf_map_update_elem(&global_map, &key, &g,
-			    BPF_ANY);
-	} else {
-		g = *gp;
-		g.event_map |= (1 << event);
-		bpf_map_update_elem(&global_map, &key, &g,
-			    BPF_ANY);
-	}
-}
-
+struct tcpbpf_globals global = {};
 int _version SEC("version") = 1;
 
 SEC("sockops")
@@ -105,29 +72,15 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 
 	op = (int) skops->op;
 
-	update_event_map(op);
+	global.event_map |= (1 << op);
 
 	switch (op) {
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
 		/* Test failure to set largest cb flag (assumes not defined) */
-		bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+		global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
 		/* Set callback */
-		good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+		global.good_cb_test_rv = bpf_sock_ops_cb_flags_set(skops,
 						 BPF_SOCK_OPS_STATE_CB_FLAG);
-		/* Update results */
-		{
-			__u32 key = 0;
-			struct tcpbpf_globals g, *gp;
-
-			gp = bpf_map_lookup_elem(&global_map, &key);
-			if (!gp)
-				break;
-			g = *gp;
-			g.bad_cb_test_rv = bad_call_rv;
-			g.good_cb_test_rv = good_call_rv;
-			bpf_map_update_elem(&global_map, &key, &g,
-					    BPF_ANY);
-		}
 		break;
 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
 		skops->sk_txhash = 0x12345f;
@@ -143,10 +96,8 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 
 				thdr = (struct tcphdr *)(header + offset);
 				v = thdr->syn;
-				__u32 key = 1;
 
-				bpf_map_update_elem(&sockopt_results, &key, &v,
-						    BPF_ANY);
+				global.tcp_saved_syn = v;
 			}
 		}
 		break;
@@ -156,25 +107,16 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		break;
 	case BPF_SOCK_OPS_STATE_CB:
 		if (skops->args[1] == BPF_TCP_CLOSE) {
-			__u32 key = 0;
-			struct tcpbpf_globals g, *gp;
-
-			gp = bpf_map_lookup_elem(&global_map, &key);
-			if (!gp)
-				break;
-			g = *gp;
 			if (skops->args[0] == BPF_TCP_LISTEN) {
-				g.num_listen++;
+				global.num_listen++;
 			} else {
-				g.total_retrans = skops->total_retrans;
-				g.data_segs_in = skops->data_segs_in;
-				g.data_segs_out = skops->data_segs_out;
-				g.bytes_received = skops->bytes_received;
-				g.bytes_acked = skops->bytes_acked;
+				global.total_retrans = skops->total_retrans;
+				global.data_segs_in = skops->data_segs_in;
+				global.data_segs_out = skops->data_segs_out;
+				global.bytes_received = skops->bytes_received;
+				global.bytes_acked = skops->bytes_acked;
 			}
-			g.num_close_events++;
-			bpf_map_update_elem(&global_map, &key, &g,
-					    BPF_ANY);
+			global.num_close_events++;
 		}
 		break;
 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
@@ -182,9 +124,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
 				   &save_syn, sizeof(save_syn));
 		/* Update global map w/ result of setsock opt */
-		__u32 key = 0;
-
-		bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
+		global.tcp_save_syn = v;
 		break;
 	default:
 		rv = -1;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 6220b95cbd02..0ed33521cbbb 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -14,5 +14,7 @@ struct tcpbpf_globals {
 	__u64 bytes_acked;
 	__u32 num_listen;
 	__u32 num_close_events;
+	__u32 tcp_save_syn;
+	__u32 tcp_saved_syn;
 };
 #endif
-- 
cgit v1.2.3


From 537e48259eacbd92f3463900c20cc3acd9dd2072 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:23:57 +0200
Subject: selftests: net: bridge: factor out mcast_packet_test

Factor out mcast_packet_test into lib.sh so it can be later extended and
reused by MLDv2 tests.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 32 ----------------------
 tools/testing/selftests/net/forwarding/lib.sh      | 32 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 0e71abdd7a03..50a48ce16ba1 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -105,38 +105,6 @@ cleanup()
 	vrf_cleanup
 }
 
-# return 0 if the packet wasn't seen on host2_if or 1 if it was
-mcast_packet_test()
-{
-	local mac=$1
-	local src_ip=$2
-	local ip=$3
-	local host1_if=$4
-	local host2_if=$5
-	local seen=0
-
-	# Add an ACL on `host2_if` which will tell us whether the packet
-	# was received by it or not.
-	tc qdisc add dev $host2_if ingress
-	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
-		flower ip_proto udp dst_mac $mac action drop
-
-	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
-	sleep 1
-
-	tc -j -s filter show dev $host2_if ingress \
-		| jq -e ".[] | select(.options.handle == 101) \
-		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
-	if [[ $? -eq 0 ]]; then
-		seen=1
-	fi
-
-	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
-	tc qdisc del dev $host2_if ingress
-
-	return $seen
-}
-
 v2reportleave_test()
 {
 	RET=0
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 927f9ba49e08..bb3ccc6d2165 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1270,3 +1270,35 @@ tcpdump_show()
 {
 	tcpdump -e -n -r $capfile 2>&1
 }
+
+# return 0 if the packet wasn't seen on host2_if or 1 if it was
+mcast_packet_test()
+{
+	local mac=$1
+	local src_ip=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local seen=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was received by it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower ip_proto udp dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -eq 0 ]]; then
+		seen=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $seen
+}
-- 
cgit v1.2.3


From 450b0b84c6609e7ec1fb0276c8a7e4efa9e78a4c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:23:58 +0200
Subject: selftests: net: lib: add support for IPv6 mcast packet test

In order to test an IPv6 multicast packet we need to pass different tc
and mausezahn protocols only, so add a simple check for the destination
address which decides if we should generate an IPv4 or IPv6 mcast
packet.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/lib.sh | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index bb3ccc6d2165..0a427b8a039d 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1280,14 +1280,22 @@ mcast_packet_test()
 	local host1_if=$4
 	local host2_if=$5
 	local seen=0
+	local tc_proto="ip"
+	local mz_v6arg=""
+
+	# basic check to see if we were passed an IPv4 address, if not assume IPv6
+	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+		tc_proto="ipv6"
+		mz_v6arg="-6"
+	fi
 
 	# Add an ACL on `host2_if` which will tell us whether the packet
 	# was received by it or not.
 	tc qdisc add dev $host2_if ingress
-	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
 		flower ip_proto udp dst_mac $mac action drop
 
-	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
 	sleep 1
 
 	tc -j -s filter show dev $host2_if ingress \
@@ -1297,7 +1305,7 @@ mcast_packet_test()
 		seen=1
 	fi
 
-	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
 	tc qdisc del dev $host2_if ingress
 
 	return $seen
-- 
cgit v1.2.3


From 95e6f430ebfee51ac174e234388e7c6e8216ff2c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:23:59 +0200
Subject: selftests: net: bridge: factor out and rename sg state functions

Factor out S,G entry state checking functions for existence, forwarding,
blocking and timer to lib.sh so they can be later used by MLDv2 tests.
Add brmcast_ suffix to their name to make the relation to the bridge
explicit.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 179 +++++++--------------
 tools/testing/selftests/net/forwarding/lib.sh      |  67 ++++++++
 2 files changed, 123 insertions(+), 123 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 50a48ce16ba1..675eff45b037 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -137,73 +137,6 @@ v2reportleave_test()
 	log_test "IGMPv2 leave $TEST_GROUP"
 }
 
-check_sg_entries()
-{
-	local report=$1; shift
-	local slist=("$@")
-	local sarg=""
-
-	for src in "${slist[@]}"; do
-		sarg="${sarg} and .source_list[].address == \"$src\""
-	done
-	bridge -j -d -s mdb show dev br0 \
-		| jq -e ".[].mdb[] | \
-			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
-	check_err $? "Wrong *,G entry source list after $report report"
-
-	for sgent in "${slist[@]}"; do
-		bridge -j -d -s mdb show dev br0 \
-			| jq -e ".[].mdb[] | \
-				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
-		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
-	done
-}
-
-check_sg_fwding()
-{
-	local should_fwd=$1; shift
-	local sources=("$@")
-
-	for src in "${sources[@]}"; do
-		local retval=0
-
-		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
-		retval=$?
-		if [ $should_fwd -eq 1 ]; then
-			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
-		else
-			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
-		fi
-	done
-}
-
-check_sg_state()
-{
-	local is_blocked=$1; shift
-	local sources=("$@")
-	local should_fail=1
-
-	if [ $is_blocked -eq 1 ]; then
-		should_fail=0
-	fi
-
-	for src in "${sources[@]}"; do
-		bridge -j -d -s mdb show dev br0 \
-			| jq -e ".[].mdb[] | \
-				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
-				 .source_list[] |
-				 select(.address == \"$src\") |
-				 select(.timer == \"0.00\")" &>/dev/null
-		check_err_fail $should_fail $? "Entry $src has zero timer"
-
-		bridge -j -d -s mdb show dev br0 \
-			| jq -e ".[].mdb[] | \
-				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
-				 .flags[] == \"blocked\")" &>/dev/null
-		check_err_fail $should_fail $? "Entry $src has blocked flag"
-	done
-}
-
 v3include_prepare()
 {
 	local host1_if=$1
@@ -225,7 +158,7 @@ v3include_prepare()
 			 select(.grp == \"$TEST_GROUP\" and \
 				.source_list != null and .filter_mode == \"include\")" &>/dev/null
 	check_err $? "Wrong *,G entry filter mode"
-	check_sg_entries "is_include" "${X[@]}"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
 }
 
 v3exclude_prepare()
@@ -247,10 +180,10 @@ v3exclude_prepare()
 				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
 	check_err $? "Wrong *,G entry filter mode"
 
-	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
 	bridge -j -d -s mdb show dev br0 \
 		| jq -e ".[].mdb[] | \
@@ -276,10 +209,10 @@ v3include_test()
 
 	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP is_include"
 
@@ -295,12 +228,12 @@ v3inc_allow_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
 	sleep 1
-	check_sg_entries "allow" "${X[@]}"
+	brmcast_check_sg_entries "allow" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> allow"
 
@@ -316,12 +249,12 @@ v3inc_is_include_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
 	sleep 1
-	check_sg_entries "is_include" "${X[@]}"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> is_include"
 
@@ -334,8 +267,8 @@ v3inc_is_exclude_test()
 
 	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
 
@@ -361,10 +294,10 @@ v3inc_to_exclude_test()
 				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
 	check_err $? "Wrong *,G entry filter mode"
 
-	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
 	bridge -j -d -s mdb show dev br0 \
 		| jq -e ".[].mdb[] | \
@@ -379,8 +312,8 @@ v3inc_to_exclude_test()
 				.source_list[].address == \"192.0.2.21\")" &>/dev/null
 	check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
 
@@ -399,13 +332,13 @@ v3exc_allow_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
 	sleep 1
-	check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
 
@@ -422,13 +355,13 @@ v3exc_is_include_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
 	sleep 1
-	check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
 
@@ -445,13 +378,13 @@ v3exc_is_exclude_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
 	sleep 1
-	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
 
@@ -471,13 +404,13 @@ v3exc_to_exclude_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
 	sleep 1
-	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
 
@@ -496,9 +429,9 @@ v3inc_block_test()
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
 	# make sure the lowered timers have expired (by default 2 seconds)
 	sleep 3
-	check_sg_entries "block" "${X[@]}"
+	brmcast_check_sg_entries "block" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
 	bridge -j -d -s mdb show dev br0 \
 		| jq -e ".[].mdb[] | \
@@ -507,8 +440,8 @@ v3inc_block_test()
 				.source_list[].address == \"192.0.2.1\")" &>/dev/null
 	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> block"
 
@@ -528,13 +461,13 @@ v3exc_block_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
 	sleep 1
-	check_sg_entries "block" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> block"
 
@@ -574,12 +507,12 @@ v3exc_timeout_test()
 				.source_list[].address == \"192.0.2.2\")" &>/dev/null
 	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
 
-	check_sg_entries "allow" "${X[@]}"
+	brmcast_check_sg_entries "allow" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 192.0.2.100
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 192.0.2.100
 
 	log_test "IGMPv3 group $TEST_GROUP exclude timeout"
 
@@ -610,7 +543,7 @@ v3star_ex_auto_add_test()
 				.flags[] == \"added_by_star_ex\")" &>/dev/null
 	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
 
-	check_sg_fwding 1 192.0.2.3
+	brmcast_check_sg_fwding 1 192.0.2.3
 
 	log_test "IGMPv3 S,G port entry automatic add to a *,G port"
 
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 0a427b8a039d..98ea37d26c44 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1310,3 +1310,70 @@ mcast_packet_test()
 
 	return $seen
 }
+
+brmcast_check_sg_entries()
+{
+	local report=$1; shift
+	local slist=("$@")
+	local sarg=""
+
+	for src in "${slist[@]}"; do
+		sarg="${sarg} and .source_list[].address == \"$src\""
+	done
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+	check_err $? "Wrong *,G entry source list after $report report"
+
+	for sgent in "${slist[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+	done
+}
+
+brmcast_check_sg_fwding()
+{
+	local should_fwd=$1; shift
+	local sources=("$@")
+
+	for src in "${sources[@]}"; do
+		local retval=0
+
+		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+		retval=$?
+		if [ $should_fwd -eq 1 ]; then
+			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+		else
+			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+		fi
+	done
+}
+
+brmcast_check_sg_state()
+{
+	local is_blocked=$1; shift
+	local sources=("$@")
+	local should_fail=1
+
+	if [ $is_blocked -eq 1 ]; then
+		should_fail=0
+	fi
+
+	for src in "${sources[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+				 .source_list[] |
+				 select(.address == \"$src\") |
+				 select(.timer == \"0.00\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has zero timer"
+
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+				 .flags[] == \"blocked\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has blocked flag"
+	done
+}
-- 
cgit v1.2.3


From 444c897111b02b06599b92e597436e09bd969501 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:00 +0200
Subject: selftests: net: bridge: add initial MLDv2 include test

Add the initial setup for MLDv2 tests with the first test of a simple
is_include report. For MLDv2 we need to setup the bridge properly and we
also send the full precooked packets instead of relying on mausezahn to
fill in some parts. For verification we use the generic S,G state checking
functions from lib.sh.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 146 +++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mld.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
new file mode 100755
index 000000000000..3d0d579e4e03
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="mldv2include_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="ff02::cc"
+TEST_GROUP_MAC="33:33:00:00:00:cc"
+
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3
+MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\
+00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
+00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
+00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \
+					mcast_mld_version 2 mcast_startup_query_interval 300 \
+					mcast_querier 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	# make sure a query has been generated
+	sleep 5
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+mldv2include_prepare()
+{
+	local host1_if=$1
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+	ip link set dev br0 type bridge mcast_mld_version 2
+	check_err $? "Could not change bridge MLD version to 2"
+
+	$MZ $host1_if $MZPKT_IS_INC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+	check_err $? "Missing *,G entry with source list"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+mldv2cleanup()
+{
+	local port=$1
+
+	bridge mdb del dev br0 port $port grp $TEST_GROUP
+	ip link set dev br0 type bridge mcast_mld_version 1
+}
+
+mldv2include_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+	mldv2include_prepare $h1
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP is_include"
+
+	mldv2cleanup $swp1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 0ef10e60682ec2604e34ff8e6eff8fb39fee176c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:01 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> allow report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)     ALLOW (B)      INCLUDE (A+B)        (B)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 3d0d579e4e03..accc4ec2dcce 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -13,6 +13,12 @@ MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:
 00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
 00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
 00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
+00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
+02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
+00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
+00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
 
 source lib.sh
 
@@ -136,6 +142,27 @@ mldv2include_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_allow_test()
+{
+	RET=0
+	local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP include -> allow"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From f44de2bc684da9d310c7703a077bd992ebdf71b1 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:02 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> is_include report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)       IS_IN (B)     INCLUDE (A+B)       (B)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index accc4ec2dcce..a93bf6fa6caa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -13,6 +13,12 @@ MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:
 00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
 00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
 00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\
+05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
+00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
+00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
 # MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
 MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
 00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
@@ -163,6 +169,27 @@ mldv2inc_allow_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_is_include_test()
+{
+	RET=0
+	local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_INC2 -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP include -> is_include"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From f9fcd55328a934a252b89b6cdde6c888a62207a7 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:03 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> is_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)       IS_EX (B)     EXCLUDE (A*B, B-A)  (B-A)=0
                                                       Delete (A-B)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 54 +++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index a93bf6fa6caa..ddef8699be7d 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -25,6 +25,12 @@ MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:f
 02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
 00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
 00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
+MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
 
 source lib.sh
 
@@ -123,6 +129,38 @@ mldv2include_prepare()
 	brmcast_check_sg_entries "is_include" "${X[@]}"
 }
 
+mldv2exclude_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local pkt=$4
+	local X=("2001:db8:1::1" "2001:db8:1::2")
+	local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+	mldv2include_prepare $h1
+
+	$MZ $host1_if -c 1 $MZPKT_IS_EXC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+			 .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists"
+}
+
 mldv2cleanup()
 {
 	local port=$1
@@ -190,6 +228,20 @@ mldv2inc_is_include_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_is_exclude_test()
+{
+	RET=0
+
+	mldv2exclude_prepare $h1
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP include -> is_exclude"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 55852f1d6a337e63c38e9c247fffaeabb5faef16 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:04 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> to_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)     TO_EX (B)      EXCLUDE (A*B,B-A)    (B-A)=0
                                                       Delete (A-B)
                                                       Send Q(MA,A*B)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 56 +++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index ddef8699be7d..571b01ef672c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
+	   mldv2inc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -31,6 +32,12 @@ MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:
 00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 
 source lib.sh
 
@@ -242,6 +249,53 @@ mldv2inc_is_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_to_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::1")
+	local Y=("2001:db8:1::20" "2001:db8:1::30")
+
+	mldv2include_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_TO_EXC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::21\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP include -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 0e77581fdf302547f71749e2e1cd657562189375 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:05 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> allow report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)   ALLOW (A)      EXCLUDE (X+A,Y-A)    (A)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 571b01ef672c..97882c13f278 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test"
+	   mldv2inc_to_exclude_test mldv2exc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -26,6 +26,11 @@ MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:f
 02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
 00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
 00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30
+MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 # MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
 MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
 00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
@@ -296,6 +301,29 @@ mldv2inc_to_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_allow_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+	local Y=("2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> allow"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 25ba7c03ef1ab77639b00cd2932e0de3b402bed7 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:06 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> is_include report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)     IS_IN (A)     EXCLUDE (X+A, Y-A)  (A)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 97882c13f278..bae865b5bc8c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test mldv2exc_allow_test"
+	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -20,6 +20,11 @@ MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01
 05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
 00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
 00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 # MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
 MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
 00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
@@ -324,6 +329,29 @@ mldv2exc_allow_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_is_include_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+	local Y=("2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_INC3 -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> is_include"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From d0b19dedd6c26a797455acb2f198fe946793f209 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:07 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> is_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)     IS_EX (A)     EXCLUDE (A-Y, Y*A)  (A-X-Y)=MALI
                                                       Delete (X-A)
                                                       Delete (Y-A)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index bae865b5bc8c..0f48c8da041b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test"
+	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
+	   mldv2exc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -42,6 +43,11 @@ MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:
 00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 # MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
 MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
 00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
@@ -352,6 +358,29 @@ mldv2exc_is_include_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_is_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::30")
+	local Y=("2001:db8:1::20")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_EXC2 -q
+	sleep 1
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 9eb4394db91c5ef9595872974224a38719781829 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:08 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> to_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)   TO_EX (A)      EXCLUDE (A-Y,Y*A)    (A-X-Y) =
                                                            Filter Timer
                                                       Delete (X-A)
                                                       Delete (Y-A)
                                                       Send Q(MA,A-Y)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 0f48c8da041b..024fa22fa3c2 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test"
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -381,6 +381,34 @@ mldv2exc_is_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_to_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::30")
+	local Y=("2001:db8:1::20")
+
+	mldv2exclude_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_TO_EXC -q
+	sleep 1
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 57386215cc0b4ed483a3ebcb0d2a378ab0db14ba Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:09 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> block report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)     BLOCK (B)      INCLUDE (A)          Send Q(MA,A*B)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 37 +++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 024fa22fa3c2..a3c405b2fd6f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test"
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -54,6 +54,12 @@ MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:
 00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\
+00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\
+00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 
 source lib.sh
 
@@ -409,6 +415,35 @@ mldv2exc_to_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_block_test()
+{
+	RET=0
+	local X=("2001:db8:1::2" "2001:db8:1::3")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_BLOCK -q
+	# make sure the lowered timers have expired (by default 2 seconds)
+	sleep 3
+	brmcast_check_sg_entries "block" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 2001:db8:1::100
+
+	log_test "MLDv2 report $TEST_GROUP include -> block"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From a2d667f0c1fb30e24ae71f6b8e832808bf18f117 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:10 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> block report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)   BLOCK (A)      EXCLUDE (X+(A-Y),Y)  (A-X-Y) =
                                                            Filter Timer
                                                       Send Q(MA,A-Y)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index a3c405b2fd6f..c498e51b8d2b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -3,7 +3,8 @@
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test"
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
+	   mldv2exc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -444,6 +445,34 @@ mldv2inc_block_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_block_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30")
+	local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_BLOCK -q
+	sleep 1
+	brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> block"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From d598cc6a2d45321a2a662742f8c38b43021e36e0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:11 +0200
Subject: selftests: net: bridge: add test for mldv2 exclude timeout

Test that when a group in exclude mode expires it changes mode to
include and the blocked entries are deleted.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 48 +++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index c498e51b8d2b..b34cf4c6ceba 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -4,7 +4,7 @@
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
 	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
-	   mldv2exc_block_test"
+	   mldv2exc_block_test mldv2exc_timeout_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -473,6 +473,52 @@ mldv2exc_block_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_timeout_test()
+{
+	RET=0
+	local X=("2001:db8:1::20" "2001:db8:1::30")
+
+	# GMI should be 3 seconds
+	ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+
+	mldv2exclude_prepare $h1
+	ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+	sleep 3
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 2001:db8:1::100
+
+	log_test "MLDv2 group $TEST_GROUP exclude timeout"
+
+	ip link set dev br0 type bridge mcast_query_interval 12500 \
+					mcast_query_response_interval 1000
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 252b353c5bb30ee9cc0c3d5cef128cec372e6a2c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:12 +0200
Subject: selftests: net: bridge: add test for mldv2 *,g auto-add

When we have *,G ports in exclude mode and a new S,G,port is added
the kernel has to automatically create an S,G entry for each exclude
port to get proper forwarding.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index b34cf4c6ceba..ffdcfa87ca2b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -4,7 +4,7 @@
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
 	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
-	   mldv2exc_block_test mldv2exc_timeout_test"
+	   mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -519,6 +519,35 @@ mldv2exc_timeout_test()
 	mldv2cleanup $swp1
 }
 
+mldv2star_ex_auto_add_test()
+{
+	RET=0
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h2 -c 1 $MZPKT_IS_INC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+				.port == \"$swp1\")" &>/dev/null
+	check_err $? "S,G entry for *,G port doesn't exist"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+				.port == \"$swp1\" and \
+				.flags[] == \"added_by_star_ex\")" &>/dev/null
+	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+	brmcast_check_sg_fwding 1 2001:db8:1::3
+
+	log_test "MLDv2 S,G port entry automatic add to a *,G port"
+
+	mldv2cleanup $swp1
+	mldv2cleanup $swp2
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 8d014eaa9254a9b8e0841df40dd36782b451579a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 3 Nov 2020 11:05:09 -0800
Subject: selftests: mptcp: add ADD_ADDR timeout test case

This patch added the test case for retransmitting ADD_ADDR when timeout
occurs. It set NS1's add_addr_timeout to 1 second, and drop NS2's ADD_ADDR
echo packets.

Here we need to slow down the transfer process of all data to let the
ADD_ADDR suboptions can be retransmitted three times. So we added a new
parameter "speed" for do_transfer, it can be set with fast or slow.

We also added three new optional parameters for run_tests, and dropped
run_remove_tests function.

Since we added the netfilter rules in this test case, we need to update
the "config" file.

Suggested-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/config        | 10 +++
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 94 ++++++++++++++++++-------
 2 files changed, 80 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 741a1c4f4ae8..0faaccd21447 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -5,3 +5,13 @@ CONFIG_INET_DIAG=m
 CONFIG_INET_MPTCP_DIAG=m
 CONFIG_VETH=y
 CONFIG_NET_SCH_NETEM=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 08f53d86dedc..0d93b243695f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -13,6 +13,24 @@ capture=0
 
 TEST_COUNT=0
 
+# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
+#				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
+CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
+			       48 0 0 0,
+			       84 0 0 240,
+			       21 0 3 64,
+			       48 0 0 54,
+			       84 0 0 240,
+			       21 6 7 48,
+			       48 0 0 0,
+			       84 0 0 240,
+			       21 0 4 96,
+			       48 0 0 74,
+			       84 0 0 240,
+			       21 0 1 48,
+			       6 0 0 65535,
+			       6 0 0 0"
+
 init()
 {
 	capout=$(mktemp)
@@ -82,6 +100,26 @@ reset_with_cookies()
 	done
 }
 
+reset_with_add_addr_timeout()
+{
+	local ip="${1:-4}"
+	local tables
+
+	tables="iptables"
+	if [ $ip -eq 6 ]; then
+		tables="ip6tables"
+	fi
+
+	reset
+
+	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+	ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+		-m tcp --tcp-option 30 \
+		-m bpf --bytecode \
+		"$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
+		-j DROP
+}
+
 for arg in "$@"; do
 	if [ "$arg" = "-c" ]; then
 		capture=1
@@ -94,6 +132,17 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
+iptables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run all tests without iptables tool"
+	exit $ksft_skip
+fi
+
+ip6tables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run all tests without ip6tables tool"
+	exit $ksft_skip
+fi
 
 check_transfer()
 {
@@ -135,6 +184,7 @@ do_transfer()
 	connect_addr="$5"
 	rm_nr_ns1="$6"
 	rm_nr_ns2="$7"
+	speed="$8"
 
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
@@ -159,7 +209,7 @@ do_transfer()
 		sleep 1
 	fi
 
-	if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+	if [ $speed = "fast" ]; then
 		mptcp_connect="./mptcp_connect -j"
 	else
 		mptcp_connect="./mptcp_connect -r"
@@ -250,26 +300,13 @@ run_tests()
 	listener_ns="$1"
 	connector_ns="$2"
 	connect_addr="$3"
+	rm_nr_ns1="${4:-0}"
+	rm_nr_ns2="${5:-0}"
+	speed="${6:-fast}"
 	lret=0
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
-	lret=$?
-	if [ $lret -ne 0 ]; then
-		ret=$lret
-		return
-	fi
-}
-
-run_remove_tests()
-{
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
-	rm_nr_ns1="$4"
-	rm_nr_ns2="$5"
-	lret=0
-
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+		${rm_nr_ns1} ${rm_nr_ns2} ${speed}
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -491,12 +528,21 @@ run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "multiple subflows and signal" 3 3 3
 chk_add_nr 1 1
 
+# add_addr timeout
+reset_with_add_addr_timeout
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1 0 0 slow
+chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
+chk_add_nr 4 0
+
 # single subflow, remove
 reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+run_tests $ns1 $ns2 10.0.1.1 0 1 slow
 chk_join_nr "remove single subflow" 1 1 1
 chk_rm_nr 1 1
 
@@ -506,7 +552,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+run_tests $ns1 $ns2 10.0.1.1 0 2 slow
 chk_join_nr "remove multiple subflows" 2 2 2
 chk_rm_nr 2 2
 
@@ -515,7 +561,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+run_tests $ns1 $ns2 10.0.1.1 1 0 slow
 chk_join_nr "remove single address" 1 1 1
 chk_add_nr 1 1
 chk_rm_nr 0 0
@@ -526,7 +572,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+run_tests $ns1 $ns2 10.0.1.1 1 1 slow
 chk_join_nr "remove subflow and signal" 2 2 2
 chk_add_nr 1 1
 chk_rm_nr 1 1
@@ -538,7 +584,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 3
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+run_tests $ns1 $ns2 10.0.1.1 1 2 slow
 chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
-- 
cgit v1.2.3


From c81ed6d81e0560713ceb94917ff1981848d0614e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:51 -0800
Subject: libbpf: Factor out common operations in BTF writing APIs

Factor out commiting of appended type data. Also extract fetching the very
last type in the BTF (to append members to). These two operations are common
across many APIs and will be easier to refactor with split BTF, if they are
extracted into a single place.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-2-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 123 ++++++++++++++++++----------------------------------
 1 file changed, 43 insertions(+), 80 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 231b07203e3d..89fecfe5cb2b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1560,6 +1560,20 @@ static void btf_type_inc_vlen(struct btf_type *t)
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
 }
 
+static int btf_commit_type(struct btf *btf, int data_sz)
+{
+	int err;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += data_sz;
+	btf->hdr->str_off += data_sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
 /*
  * Append new BTF_KIND_INT type with:
  *   - *name* - non-empty, non-NULL type name;
@@ -1572,7 +1586,7 @@ static void btf_type_inc_vlen(struct btf_type *t)
 int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
 {
 	struct btf_type *t;
-	int sz, err, name_off;
+	int sz, name_off;
 
 	/* non-empty name */
 	if (!name || !name[0])
@@ -1606,14 +1620,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 	/* set INT info, we don't allow setting legacy bit offset/size */
 	*(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /* it's completely legal to append BTF types with type IDs pointing forward to
@@ -1631,7 +1638,7 @@ static int validate_type_id(int id)
 static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
 {
 	struct btf_type *t;
-	int sz, name_off = 0, err;
+	int sz, name_off = 0;
 
 	if (validate_type_id(ref_type_id))
 		return -EINVAL;
@@ -1654,14 +1661,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
 	t->info = btf_type_info(kind, 0, 0);
 	t->type = ref_type_id;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -1689,7 +1689,7 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
 {
 	struct btf_type *t;
 	struct btf_array *a;
-	int sz, err;
+	int sz;
 
 	if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
 		return -EINVAL;
@@ -1711,21 +1711,14 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
 	a->index_type = index_type_id;
 	a->nelems = nr_elems;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /* generic STRUCT/UNION append function */
 static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
 {
 	struct btf_type *t;
-	int sz, err, name_off = 0;
+	int sz, name_off = 0;
 
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
@@ -1748,14 +1741,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
 	t->info = btf_type_info(kind, 0, 0);
 	t->size = bytes_sz;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -1793,6 +1779,11 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
 	return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
 }
 
+static struct btf_type *btf_last_type(struct btf *btf)
+{
+	return btf_type_by_id(btf, btf__get_nr_types(btf));
+}
+
 /*
  * Append new field for the current STRUCT/UNION type with:
  *   - *name* - name of the field, can be NULL or empty for anonymous field;
@@ -1814,7 +1805,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 	/* last type should be union/struct */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_composite(t))
 		return -EINVAL;
 
@@ -1849,7 +1840,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 	m->offset = bit_offset | (bit_size << 24);
 
 	/* btf_add_type_mem can invalidate t pointer */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	/* update parent type's vlen and kflag */
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
 
@@ -1874,7 +1865,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
 {
 	struct btf_type *t;
-	int sz, err, name_off = 0;
+	int sz, name_off = 0;
 
 	/* byte_sz must be power of 2 */
 	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
@@ -1899,14 +1890,7 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
 	t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
 	t->size = byte_sz;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -1926,7 +1910,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 	/* last type should be BTF_KIND_ENUM */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_enum(t))
 		return -EINVAL;
 
@@ -1953,7 +1937,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 	v->val = value;
 
 	/* update parent type's vlen */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
 	btf->hdr->type_len += sz;
@@ -2093,7 +2077,7 @@ int btf__add_func(struct btf *btf, const char *name,
 int btf__add_func_proto(struct btf *btf, int ret_type_id)
 {
 	struct btf_type *t;
-	int sz, err;
+	int sz;
 
 	if (validate_type_id(ret_type_id))
 		return -EINVAL;
@@ -2113,14 +2097,7 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
 	t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
 	t->type = ret_type_id;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -2143,7 +2120,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	/* last type should be BTF_KIND_FUNC_PROTO */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_func_proto(t))
 		return -EINVAL;
 
@@ -2166,7 +2143,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	p->type = type_id;
 
 	/* update parent type's vlen */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
 	btf->hdr->type_len += sz;
@@ -2188,7 +2165,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 {
 	struct btf_type *t;
 	struct btf_var *v;
-	int sz, err, name_off;
+	int sz, name_off;
 
 	/* non-empty name */
 	if (!name || !name[0])
@@ -2219,14 +2196,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 	v = btf_var(t);
 	v->linkage = linkage;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -2244,7 +2214,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 {
 	struct btf_type *t;
-	int sz, err, name_off;
+	int sz, name_off;
 
 	/* non-empty name */
 	if (!name || !name[0])
@@ -2267,14 +2237,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 	t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
 	t->size = byte_sz;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -2296,7 +2259,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 	/* last type should be BTF_KIND_DATASEC */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_datasec(t))
 		return -EINVAL;
 
@@ -2317,7 +2280,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 	v->size = byte_sz;
 
 	/* update parent type's vlen */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
 	btf->hdr->type_len += sz;
-- 
cgit v1.2.3


From d9448f94962bd28554df7d9a342d37c7f13d6232 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:52 -0800
Subject: selftest/bpf: Relax btf_dedup test checks

Remove the requirement of a strictly exact string section contents. This used
to be true when string deduplication was done through sorting, but with string
dedup done through hash table, it's no longer true. So relax test harness to
relax strings checks and, consequently, type checks, which now don't have to
have exactly the same string offsets.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 40 +++++++++++++++++-----------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 93162484c2ca..8ae97e2a4b9d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -6652,7 +6652,7 @@ static void do_test_dedup(unsigned int test_num)
 	const void *test_btf_data, *expect_btf_data;
 	const char *ret_test_next_str, *ret_expect_next_str;
 	const char *test_strs, *expect_strs;
-	const char *test_str_cur, *test_str_end;
+	const char *test_str_cur;
 	const char *expect_str_cur, *expect_str_end;
 	unsigned int raw_btf_size;
 	void *raw_btf;
@@ -6719,12 +6719,18 @@ static void do_test_dedup(unsigned int test_num)
 		goto done;
 	}
 
-	test_str_cur = test_strs;
-	test_str_end = test_strs + test_hdr->str_len;
 	expect_str_cur = expect_strs;
 	expect_str_end = expect_strs + expect_hdr->str_len;
-	while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
+	while (expect_str_cur < expect_str_end) {
 		size_t test_len, expect_len;
+		int off;
+
+		off = btf__find_str(test_btf, expect_str_cur);
+		if (CHECK(off < 0, "exp str '%s' not found: %d\n", expect_str_cur, off)) {
+			err = -1;
+			goto done;
+		}
+		test_str_cur = btf__str_by_offset(test_btf, off);
 
 		test_len = strlen(test_str_cur);
 		expect_len = strlen(expect_str_cur);
@@ -6741,15 +6747,8 @@ static void do_test_dedup(unsigned int test_num)
 			err = -1;
 			goto done;
 		}
-		test_str_cur += test_len + 1;
 		expect_str_cur += expect_len + 1;
 	}
-	if (CHECK(test_str_cur != test_str_end,
-		  "test_str_cur:%p != test_str_end:%p",
-		  test_str_cur, test_str_end)) {
-		err = -1;
-		goto done;
-	}
 
 	test_nr_types = btf__get_nr_types(test_btf);
 	expect_nr_types = btf__get_nr_types(expect_btf);
@@ -6775,10 +6774,21 @@ static void do_test_dedup(unsigned int test_num)
 			err = -1;
 			goto done;
 		}
-		if (CHECK(memcmp((void *)test_type,
-				 (void *)expect_type,
-				 test_size),
-			  "type #%d: contents differ", i)) {
+		if (CHECK(btf_kind(test_type) != btf_kind(expect_type),
+			  "type %d kind: exp %d != got %u\n",
+			  i, btf_kind(expect_type), btf_kind(test_type))) {
+			err = -1;
+			goto done;
+		}
+		if (CHECK(test_type->info != expect_type->info,
+			  "type %d info: exp %d != got %u\n",
+			  i, expect_type->info, test_type->info)) {
+			err = -1;
+			goto done;
+		}
+		if (CHECK(test_type->size != expect_type->size,
+			  "type %d size/type: exp %d != got %u\n",
+			  i, expect_type->size, test_type->size)) {
 			err = -1;
 			goto done;
 		}
-- 
cgit v1.2.3


From 88a82c2a9ab5b2ce533c3de3f4517853c2f67f53 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 4 Nov 2020 20:33:53 -0800
Subject: libbpf: Unify and speed up BTF string deduplication

Revamp BTF dedup's string deduplication to match the approach of writable BTF
string management. This allows to transfer deduplicated strings index back to
BTF object after deduplication without expensive extra memory copying and hash
map re-construction. It also simplifies the code and speeds it up, because
hashmap-based string deduplication is faster than sort + unique approach.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-4-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 263 ++++++++++++++++++++--------------------------------
 1 file changed, 98 insertions(+), 165 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 89fecfe5cb2b..fa1b147c63c6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -90,6 +90,14 @@ struct btf {
 	struct hashmap *strs_hash;
 	/* whether strings are already deduplicated */
 	bool strs_deduped;
+	/* extra indirection layer to make strings hashmap work with stable
+	 * string offsets and ability to transparently choose between
+	 * btf->strs_data or btf_dedup->strs_data as a source of strings.
+	 * This is used for BTF strings dedup to transfer deduplicated strings
+	 * data back to struct btf without re-building strings index.
+	 */
+	void **strs_data_ptr;
+
 	/* BTF object FD, if loaded into kernel */
 	int fd;
 
@@ -1363,17 +1371,19 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
 static size_t strs_hash_fn(const void *key, void *ctx)
 {
-	struct btf *btf = ctx;
-	const char *str = btf->strs_data + (long)key;
+	const struct btf *btf = ctx;
+	const char *strs = *btf->strs_data_ptr;
+	const char *str = strs + (long)key;
 
 	return str_hash(str);
 }
 
 static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
 {
-	struct btf *btf = ctx;
-	const char *str1 = btf->strs_data + (long)key1;
-	const char *str2 = btf->strs_data + (long)key2;
+	const struct btf *btf = ctx;
+	const char *strs = *btf->strs_data_ptr;
+	const char *str1 = strs + (long)key1;
+	const char *str2 = strs + (long)key2;
 
 	return strcmp(str1, str2) == 0;
 }
@@ -1418,6 +1428,9 @@ static int btf_ensure_modifiable(struct btf *btf)
 	memcpy(types, btf->types_data, btf->hdr->type_len);
 	memcpy(strs, btf->strs_data, btf->hdr->str_len);
 
+	/* make hashmap below use btf->strs_data as a source of strings */
+	btf->strs_data_ptr = &btf->strs_data;
+
 	/* build lookup index for all strings */
 	hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
 	if (IS_ERR(hash)) {
@@ -2824,19 +2837,11 @@ struct btf_dedup {
 	size_t hypot_cap;
 	/* Various option modifying behavior of algorithm */
 	struct btf_dedup_opts opts;
-};
-
-struct btf_str_ptr {
-	const char *str;
-	__u32 new_off;
-	bool used;
-};
-
-struct btf_str_ptrs {
-	struct btf_str_ptr *ptrs;
-	const char *data;
-	__u32 cnt;
-	__u32 cap;
+	/* temporary strings deduplication state */
+	void *strs_data;
+	size_t strs_cap;
+	size_t strs_len;
+	struct hashmap* strs_hash;
 };
 
 static long hash_combine(long h, long value)
@@ -3063,64 +3068,41 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 	return 0;
 }
 
-static int str_sort_by_content(const void *a1, const void *a2)
-{
-	const struct btf_str_ptr *p1 = a1;
-	const struct btf_str_ptr *p2 = a2;
-
-	return strcmp(p1->str, p2->str);
-}
-
-static int str_sort_by_offset(const void *a1, const void *a2)
-{
-	const struct btf_str_ptr *p1 = a1;
-	const struct btf_str_ptr *p2 = a2;
-
-	if (p1->str != p2->str)
-		return p1->str < p2->str ? -1 : 1;
-	return 0;
-}
-
-static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
-{
-	const struct btf_str_ptr *p = pelem;
-
-	if (str_ptr != p->str)
-		return (const char *)str_ptr < p->str ? -1 : 1;
-	return 0;
-}
-
-static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
+static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 {
-	struct btf_str_ptrs *strs;
-	struct btf_str_ptr *s;
+	struct btf_dedup *d = ctx;
+	long old_off, new_off, len;
+	const char *s;
+	void *p;
+	int err;
 
 	if (*str_off_ptr == 0)
 		return 0;
 
-	strs = ctx;
-	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
-		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
-	if (!s)
-		return -EINVAL;
-	s->used = true;
-	return 0;
-}
+	s = btf__str_by_offset(d->btf, *str_off_ptr);
+	len = strlen(s) + 1;
 
-static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
-{
-	struct btf_str_ptrs *strs;
-	struct btf_str_ptr *s;
+	new_off = d->strs_len;
+	p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
+	if (!p)
+		return -ENOMEM;
 
-	if (*str_off_ptr == 0)
-		return 0;
+	memcpy(p, s, len);
 
-	strs = ctx;
-	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
-		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
-	if (!s)
-		return -EINVAL;
-	*str_off_ptr = s->new_off;
+	/* Now attempt to add the string, but only if the string with the same
+	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
+	 * string exists, we'll get its offset in old_off (that's old_key).
+	 */
+	err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
+			      HASHMAP_ADD, (const void **)&old_off, NULL);
+	if (err == -EEXIST) {
+		*str_off_ptr = old_off;
+	} else if (err) {
+		return err;
+	} else {
+		*str_off_ptr = new_off;
+		d->strs_len += len;
+	}
 	return 0;
 }
 
@@ -3137,118 +3119,69 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
  */
 static int btf_dedup_strings(struct btf_dedup *d)
 {
-	char *start = d->btf->strs_data;
-	char *end = start + d->btf->hdr->str_len;
-	char *p = start, *tmp_strs = NULL;
-	struct btf_str_ptrs strs = {
-		.cnt = 0,
-		.cap = 0,
-		.ptrs = NULL,
-		.data = start,
-	};
-	int i, j, err = 0, grp_idx;
-	bool grp_used;
+	char *s;
+	int err;
 
 	if (d->btf->strs_deduped)
 		return 0;
 
-	/* build index of all strings */
-	while (p < end) {
-		if (strs.cnt + 1 > strs.cap) {
-			struct btf_str_ptr *new_ptrs;
-
-			strs.cap += max(strs.cnt / 2, 16U);
-			new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
-			if (!new_ptrs) {
-				err = -ENOMEM;
-				goto done;
-			}
-			strs.ptrs = new_ptrs;
-		}
-
-		strs.ptrs[strs.cnt].str = p;
-		strs.ptrs[strs.cnt].used = false;
-
-		p += strlen(p) + 1;
-		strs.cnt++;
-	}
-
-	/* temporary storage for deduplicated strings */
-	tmp_strs = malloc(d->btf->hdr->str_len);
-	if (!tmp_strs) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	/* mark all used strings */
-	strs.ptrs[0].used = true;
-	err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
-	if (err)
-		goto done;
-
-	/* sort strings by context, so that we can identify duplicates */
-	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
+	s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
+	if (!s)
+		return -ENOMEM;
+	/* initial empty string */
+	s[0] = 0;
+	d->strs_len = 1;
 
-	/*
-	 * iterate groups of equal strings and if any instance in a group was
-	 * referenced, emit single instance and remember new offset
+	/* temporarily switch to use btf_dedup's strs_data for strings for hash
+	 * functions; later we'll just transfer hashmap to struct btf as is,
+	 * along the strs_data
 	 */
-	p = tmp_strs;
-	grp_idx = 0;
-	grp_used = strs.ptrs[0].used;
-	/* iterate past end to avoid code duplication after loop */
-	for (i = 1; i <= strs.cnt; i++) {
-		/*
-		 * when i == strs.cnt, we want to skip string comparison and go
-		 * straight to handling last group of strings (otherwise we'd
-		 * need to handle last group after the loop w/ duplicated code)
-		 */
-		if (i < strs.cnt &&
-		    !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
-			grp_used = grp_used || strs.ptrs[i].used;
-			continue;
-		}
+	d->btf->strs_data_ptr = &d->strs_data;
 
-		/*
-		 * this check would have been required after the loop to handle
-		 * last group of strings, but due to <= condition in a loop
-		 * we avoid that duplication
-		 */
-		if (grp_used) {
-			int new_off = p - tmp_strs;
-			__u32 len = strlen(strs.ptrs[grp_idx].str);
-
-			memmove(p, strs.ptrs[grp_idx].str, len + 1);
-			for (j = grp_idx; j < i; j++)
-				strs.ptrs[j].new_off = new_off;
-			p += len + 1;
-		}
-
-		if (i < strs.cnt) {
-			grp_idx = i;
-			grp_used = strs.ptrs[i].used;
-		}
+	d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf);
+	if (IS_ERR(d->strs_hash)) {
+		err = PTR_ERR(d->strs_hash);
+		d->strs_hash = NULL;
+		goto err_out;
 	}
 
-	/* replace original strings with deduped ones */
-	d->btf->hdr->str_len = p - tmp_strs;
-	memmove(start, tmp_strs, d->btf->hdr->str_len);
-	end = start + d->btf->hdr->str_len;
-
-	/* restore original order for further binary search lookups */
-	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
+	/* insert empty string; we won't be looking it up during strings
+	 * dedup, but it's good to have it for generic BTF string lookups
+	 */
+	err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
+			      HASHMAP_ADD, NULL, NULL);
+	if (err)
+		goto err_out;
 
 	/* remap string offsets */
-	err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+	err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
 	if (err)
-		goto done;
+		goto err_out;
 
-	d->btf->hdr->str_len = end - start;
+	/* replace BTF string data and hash with deduped ones */
+	free(d->btf->strs_data);
+	hashmap__free(d->btf->strs_hash);
+	d->btf->strs_data = d->strs_data;
+	d->btf->strs_data_cap = d->strs_cap;
+	d->btf->hdr->str_len = d->strs_len;
+	d->btf->strs_hash = d->strs_hash;
+	/* now point strs_data_ptr back to btf->strs_data */
+	d->btf->strs_data_ptr = &d->btf->strs_data;
+
+	d->strs_data = d->strs_hash = NULL;
+	d->strs_len = d->strs_cap = 0;
 	d->btf->strs_deduped = true;
+	return 0;
+
+err_out:
+	free(d->strs_data);
+	hashmap__free(d->strs_hash);
+	d->strs_data = d->strs_hash = NULL;
+	d->strs_len = d->strs_cap = 0;
+
+	/* restore strings pointer for existing d->btf->strs_hash back */
+	d->btf->strs_data_ptr = &d->strs_data;
 
-done:
-	free(tmp_strs);
-	free(strs.ptrs);
 	return err;
 }
 
-- 
cgit v1.2.3


From ba451366bf44498f22dd16c31a792083bd6f2ae1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:54 -0800
Subject: libbpf: Implement basic split BTF support

Support split BTF operation, in which one BTF (base BTF) provides basic set of
types and strings, while another one (split BTF) builds on top of base's types
and strings and adds its own new types and strings. From API standpoint, the
fact that the split BTF is built on top of the base BTF is transparent.

Type numeration is transparent. If the base BTF had last type ID #N, then all
types in the split BTF start at type ID N+1. Any type in split BTF can
reference base BTF types, but not vice versa. Programmatically construction of
a split BTF on top of a base BTF is supported: one can create an empty split
BTF with btf__new_empty_split() and pass base BTF as an input, or pass raw
binary data to btf__new_split(), or use btf__parse_xxx_split() variants to get
initial set of split types/strings from the ELF file with .BTF section.

String offsets are similarly transparent and are a logical continuation of
base BTF's strings. When building BTF programmatically and adding a new string
(explicitly with btf__add_str() or implicitly through appending new
types/members), string-to-be-added would first be looked up from the base
BTF's string section and re-used if it's there. If not, it will be looked up
and/or added to the split BTF string section. Similarly to type IDs, types in
split BTF can refer to strings from base BTF absolutely transparently (but not
vice versa, of course, because base BTF doesn't "know" about existence of
split BTF).

Internal type index is slightly adjusted to be zero-indexed, ignoring a fake
[0] VOID type. This allows to handle split/base BTF type lookups transparently
by using btf->start_id type ID offset, which is always 1 for base/non-split
BTF and equals btf__get_nr_types(base_btf) + 1 for the split BTF.

BTF deduplication is not yet supported for split BTF and support for it will
be added in separate patch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-5-andrii@kernel.org
---
 tools/lib/bpf/btf.c      | 197 ++++++++++++++++++++++++++++++++++++-----------
 tools/lib/bpf/btf.h      |   8 ++
 tools/lib/bpf/libbpf.map |   9 +++
 3 files changed, 169 insertions(+), 45 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index fa1b147c63c6..0258cf108c0a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -78,10 +78,32 @@ struct btf {
 	void *types_data;
 	size_t types_data_cap; /* used size stored in hdr->type_len */
 
-	/* type ID to `struct btf_type *` lookup index */
+	/* type ID to `struct btf_type *` lookup index
+	 * type_offs[0] corresponds to the first non-VOID type:
+	 *   - for base BTF it's type [1];
+	 *   - for split BTF it's the first non-base BTF type.
+	 */
 	__u32 *type_offs;
 	size_t type_offs_cap;
+	/* number of types in this BTF instance:
+	 *   - doesn't include special [0] void type;
+	 *   - for split BTF counts number of types added on top of base BTF.
+	 */
 	__u32 nr_types;
+	/* if not NULL, points to the base BTF on top of which the current
+	 * split BTF is based
+	 */
+	struct btf *base_btf;
+	/* BTF type ID of the first type in this BTF instance:
+	 *   - for base BTF it's equal to 1;
+	 *   - for split BTF it's equal to biggest type ID of base BTF plus 1.
+	 */
+	int start_id;
+	/* logical string offset of this BTF instance:
+	 *   - for base BTF it's equal to 0;
+	 *   - for split BTF it's equal to total size of base BTF's string section size.
+	 */
+	int start_str_off;
 
 	void *strs_data;
 	size_t strs_data_cap; /* used size stored in hdr->str_len */
@@ -176,7 +198,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
 	__u32 *p;
 
 	p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
-			btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+			btf->nr_types, BTF_MAX_NR_TYPES, 1);
 	if (!p)
 		return -ENOMEM;
 
@@ -252,12 +274,16 @@ static int btf_parse_str_sec(struct btf *btf)
 	const char *start = btf->strs_data;
 	const char *end = start + btf->hdr->str_len;
 
-	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
-	    start[0] || end[-1]) {
+	if (btf->base_btf && hdr->str_len == 0)
+		return 0;
+	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
+		pr_debug("Invalid BTF string section\n");
+		return -EINVAL;
+	}
+	if (!btf->base_btf && start[0]) {
 		pr_debug("Invalid BTF string section\n");
 		return -EINVAL;
 	}
-
 	return 0;
 }
 
@@ -372,19 +398,9 @@ static int btf_parse_type_sec(struct btf *btf)
 	struct btf_header *hdr = btf->hdr;
 	void *next_type = btf->types_data;
 	void *end_type = next_type + hdr->type_len;
-	int err, i = 0, type_size;
-
-	/* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
-	 * so ensure we can never properly use its offset from index by
-	 * setting it to a large value
-	 */
-	err = btf_add_type_idx_entry(btf, UINT_MAX);
-	if (err)
-		return err;
+	int err, type_size;
 
 	while (next_type + sizeof(struct btf_type) <= end_type) {
-		i++;
-
 		if (btf->swapped_endian)
 			btf_bswap_type_base(next_type);
 
@@ -392,7 +408,7 @@ static int btf_parse_type_sec(struct btf *btf)
 		if (type_size < 0)
 			return type_size;
 		if (next_type + type_size > end_type) {
-			pr_warn("BTF type [%d] is malformed\n", i);
+			pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types);
 			return -EINVAL;
 		}
 
@@ -417,7 +433,7 @@ static int btf_parse_type_sec(struct btf *btf)
 
 __u32 btf__get_nr_types(const struct btf *btf)
 {
-	return btf->nr_types;
+	return btf->start_id + btf->nr_types - 1;
 }
 
 /* internal helper returning non-const pointer to a type */
@@ -425,13 +441,14 @@ static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 {
 	if (type_id == 0)
 		return &btf_void;
-
-	return btf->types_data + btf->type_offs[type_id];
+	if (type_id < btf->start_id)
+		return btf_type_by_id(btf->base_btf, type_id);
+	return btf->types_data + btf->type_offs[type_id - btf->start_id];
 }
 
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 {
-	if (type_id > btf->nr_types)
+	if (type_id >= btf->start_id + btf->nr_types)
 		return NULL;
 	return btf_type_by_id((struct btf *)btf, type_id);
 }
@@ -440,9 +457,13 @@ static int determine_ptr_size(const struct btf *btf)
 {
 	const struct btf_type *t;
 	const char *name;
-	int i;
+	int i, n;
 
-	for (i = 1; i <= btf->nr_types; i++) {
+	if (btf->base_btf && btf->base_btf->ptr_sz > 0)
+		return btf->base_btf->ptr_sz;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
 		t = btf__type_by_id(btf, i);
 		if (!btf_is_int(t))
 			continue;
@@ -725,7 +746,7 @@ void btf__free(struct btf *btf)
 	free(btf);
 }
 
-struct btf *btf__new_empty(void)
+static struct btf *btf_new_empty(struct btf *base_btf)
 {
 	struct btf *btf;
 
@@ -733,12 +754,21 @@ struct btf *btf__new_empty(void)
 	if (!btf)
 		return ERR_PTR(-ENOMEM);
 
+	btf->nr_types = 0;
+	btf->start_id = 1;
+	btf->start_str_off = 0;
 	btf->fd = -1;
 	btf->ptr_sz = sizeof(void *);
 	btf->swapped_endian = false;
 
+	if (base_btf) {
+		btf->base_btf = base_btf;
+		btf->start_id = btf__get_nr_types(base_btf) + 1;
+		btf->start_str_off = base_btf->hdr->str_len;
+	}
+
 	/* +1 for empty string at offset 0 */
-	btf->raw_size = sizeof(struct btf_header) + 1;
+	btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1);
 	btf->raw_data = calloc(1, btf->raw_size);
 	if (!btf->raw_data) {
 		free(btf);
@@ -752,12 +782,22 @@ struct btf *btf__new_empty(void)
 
 	btf->types_data = btf->raw_data + btf->hdr->hdr_len;
 	btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
-	btf->hdr->str_len = 1; /* empty string at offset 0 */
+	btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
 
 	return btf;
 }
 
-struct btf *btf__new(const void *data, __u32 size)
+struct btf *btf__new_empty(void)
+{
+	return btf_new_empty(NULL);
+}
+
+struct btf *btf__new_empty_split(struct btf *base_btf)
+{
+	return btf_new_empty(base_btf);
+}
+
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
 {
 	struct btf *btf;
 	int err;
@@ -766,6 +806,16 @@ struct btf *btf__new(const void *data, __u32 size)
 	if (!btf)
 		return ERR_PTR(-ENOMEM);
 
+	btf->nr_types = 0;
+	btf->start_id = 1;
+	btf->start_str_off = 0;
+
+	if (base_btf) {
+		btf->base_btf = base_btf;
+		btf->start_id = btf__get_nr_types(base_btf) + 1;
+		btf->start_str_off = base_btf->hdr->str_len;
+	}
+
 	btf->raw_data = malloc(size);
 	if (!btf->raw_data) {
 		err = -ENOMEM;
@@ -798,7 +848,13 @@ done:
 	return btf;
 }
 
-struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+struct btf *btf__new(const void *data, __u32 size)
+{
+	return btf_new(data, size, NULL);
+}
+
+static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
+				 struct btf_ext **btf_ext)
 {
 	Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
 	int err = 0, fd = -1, idx = 0;
@@ -876,7 +932,7 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 		err = -ENOENT;
 		goto done;
 	}
-	btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
 	if (IS_ERR(btf))
 		goto done;
 
@@ -921,7 +977,17 @@ done:
 	return btf;
 }
 
-struct btf *btf__parse_raw(const char *path)
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+	return btf_parse_elf(path, NULL, btf_ext);
+}
+
+struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
+{
+	return btf_parse_elf(path, base_btf, NULL);
+}
+
+static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
 {
 	struct btf *btf = NULL;
 	void *data = NULL;
@@ -975,7 +1041,7 @@ struct btf *btf__parse_raw(const char *path)
 	}
 
 	/* finally parse BTF data */
-	btf = btf__new(data, sz);
+	btf = btf_new(data, sz, base_btf);
 
 err_out:
 	free(data);
@@ -984,18 +1050,38 @@ err_out:
 	return err ? ERR_PTR(err) : btf;
 }
 
-struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+struct btf *btf__parse_raw(const char *path)
+{
+	return btf_parse_raw(path, NULL);
+}
+
+struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
+{
+	return btf_parse_raw(path, base_btf);
+}
+
+static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
 {
 	struct btf *btf;
 
 	if (btf_ext)
 		*btf_ext = NULL;
 
-	btf = btf__parse_raw(path);
+	btf = btf_parse_raw(path, base_btf);
 	if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
 		return btf;
 
-	return btf__parse_elf(path, btf_ext);
+	return btf_parse_elf(path, base_btf, btf_ext);
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+	return btf_parse(path, NULL, btf_ext);
+}
+
+struct btf *btf__parse_split(const char *path, struct btf *base_btf)
+{
+	return btf_parse(path, base_btf, NULL);
 }
 
 static int compare_vsi_off(const void *_a, const void *_b)
@@ -1179,8 +1265,8 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
 
 	memcpy(p, btf->types_data, hdr->type_len);
 	if (swap_endian) {
-		for (i = 1; i <= btf->nr_types; i++) {
-			t = p  + btf->type_offs[i];
+		for (i = 0; i < btf->nr_types; i++) {
+			t = p + btf->type_offs[i];
 			/* btf_bswap_type_rest() relies on native t->info, so
 			 * we swap base type info after we swapped all the
 			 * additional information
@@ -1223,8 +1309,10 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
 
 const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
 {
-	if (offset < btf->hdr->str_len)
-		return btf->strs_data + offset;
+	if (offset < btf->start_str_off)
+		return btf__str_by_offset(btf->base_btf, offset);
+	else if (offset - btf->start_str_off < btf->hdr->str_len)
+		return btf->strs_data + (offset - btf->start_str_off);
 	else
 		return NULL;
 }
@@ -1461,7 +1549,10 @@ static int btf_ensure_modifiable(struct btf *btf)
 	/* if BTF was created from scratch, all strings are guaranteed to be
 	 * unique and deduplicated
 	 */
-	btf->strs_deduped = btf->hdr->str_len <= 1;
+	if (btf->hdr->str_len == 0)
+		btf->strs_deduped = true;
+	if (!btf->base_btf && btf->hdr->str_len == 1)
+		btf->strs_deduped = true;
 
 	/* invalidate raw_data representation */
 	btf_invalidate_raw_data(btf);
@@ -1493,6 +1584,14 @@ int btf__find_str(struct btf *btf, const char *s)
 	long old_off, new_off, len;
 	void *p;
 
+	if (btf->base_btf) {
+		int ret;
+
+		ret = btf__find_str(btf->base_btf, s);
+		if (ret != -ENOENT)
+			return ret;
+	}
+
 	/* BTF needs to be in a modifiable state to build string lookup index */
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
@@ -1507,7 +1606,7 @@ int btf__find_str(struct btf *btf, const char *s)
 	memcpy(p, s, len);
 
 	if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
-		return old_off;
+		return btf->start_str_off + old_off;
 
 	return -ENOENT;
 }
@@ -1523,6 +1622,14 @@ int btf__add_str(struct btf *btf, const char *s)
 	void *p;
 	int err;
 
+	if (btf->base_btf) {
+		int ret;
+
+		ret = btf__find_str(btf->base_btf, s);
+		if (ret != -ENOENT)
+			return ret;
+	}
+
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
 
@@ -1549,12 +1656,12 @@ int btf__add_str(struct btf *btf, const char *s)
 	err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
 			      HASHMAP_ADD, (const void **)&old_off, NULL);
 	if (err == -EEXIST)
-		return old_off; /* duplicated string, return existing offset */
+		return btf->start_str_off + old_off; /* duplicated string, return existing offset */
 	if (err)
 		return err;
 
 	btf->hdr->str_len += len; /* new unique string, adjust data length */
-	return new_off;
+	return btf->start_str_off + new_off;
 }
 
 static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
@@ -1584,7 +1691,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 	btf->hdr->type_len += data_sz;
 	btf->hdr->str_off += data_sz;
 	btf->nr_types++;
-	return btf->nr_types;
+	return btf->start_id + btf->nr_types - 1;
 }
 
 /*
@@ -4167,14 +4274,14 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
 
 		memmove(p, btf__type_by_id(d->btf, i), len);
 		d->hypot_map[i] = next_type_id;
-		d->btf->type_offs[next_type_id] = p - d->btf->types_data;
+		d->btf->type_offs[next_type_id - 1] = p - d->btf->types_data;
 		p += len;
 		next_type_id++;
 	}
 
 	/* shrink struct btf's internal types index and update btf_header */
 	d->btf->nr_types = next_type_id - 1;
-	d->btf->type_offs_cap = d->btf->nr_types + 1;
+	d->btf->type_offs_cap = d->btf->nr_types;
 	d->btf->hdr->type_len = p - d->btf->types_data;
 	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
 				       sizeof(*new_offs));
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 57247240a20a..1093f6fe6800 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -31,11 +31,19 @@ enum btf_endianness {
 };
 
 LIBBPF_API void btf__free(struct btf *btf);
+
 LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf);
 LIBBPF_API struct btf *btf__new_empty(void);
+LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
+
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf);
 LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf);
 LIBBPF_API struct btf *btf__parse_raw(const char *path);
+LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
+
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 4ebfadf45b47..29ff4807b909 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -337,3 +337,12 @@ LIBBPF_0.2.0 {
 		perf_buffer__consume_buffer;
 		xsk_socket__create_shared;
 } LIBBPF_0.1.0;
+
+LIBBPF_0.3.0 {
+	global:
+		btf__parse_elf_split;
+		btf__parse_raw_split;
+		btf__parse_split;
+		btf__new_empty_split;
+		btf__new_split;
+} LIBBPF_0.2.0;
-- 
cgit v1.2.3


From 197389da2fbfbc3cefb229268c32d858d9575c96 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:55 -0800
Subject: selftests/bpf: Add split BTF basic test

Add selftest validating ability to programmatically generate and then dump
split BTF.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-6-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/btf_split.c | 99 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h           | 11 +++
 2 files changed, 110 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_split.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
new file mode 100644
index 000000000000..ca7c2a91610a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+	vfprintf(ctx, fmt, args);
+}
+
+void test_btf_split() {
+	struct btf_dump_opts opts;
+	struct btf_dump *d = NULL;
+	const struct btf_type *t;
+	struct btf *btf1, *btf2;
+	int str_off, i, err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 1);				/* [2] ptr to int */
+
+	btf__add_struct(btf1, "s1", 4);			/* [3] struct s1 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	/* pointer size should be "inherited" from main BTF */
+	ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz");
+
+	str_off = btf__find_str(btf2, "int");
+	ASSERT_NEQ(str_off, -ENOENT, "str_int_missing");
+
+	t = btf__type_by_id(btf2, 1);
+	if (!ASSERT_OK_PTR(t, "int_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_int(t), true, "int_kind");
+	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name");
+
+	btf__add_struct(btf2, "s2", 16);		/* [4] struct s2 {	*/
+	btf__add_field(btf2, "f1", 3, 0, 0);		/*      struct s1 f1;	*/
+	btf__add_field(btf2, "f2", 1, 32, 0);		/*      int f2;		*/
+	btf__add_field(btf2, "f3", 2, 64, 0);		/*      int *f3;	*/
+							/* } */
+
+	t = btf__type_by_id(btf1, 4);
+	ASSERT_NULL(t, "split_type_in_main");
+
+	t = btf__type_by_id(btf2, 4);
+	if (!ASSERT_OK_PTR(t, "split_struct_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_struct(t), true, "split_struct_kind");
+	ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
+	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+
+	/* BTF-to-C dump of split BTF */
+	dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+	if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+		return;
+	opts.ctx = dump_buf_file;
+	d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf);
+	if (!ASSERT_OK_PTR(d, "btf_dump__new"))
+		goto cleanup;
+	for (i = 1; i <= btf__get_nr_types(btf2); i++) {
+		err = btf_dump__dump_type(d, i);
+		ASSERT_OK(err, "dump_type_ok");
+	}
+	fflush(dump_buf_file);
+	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+	ASSERT_STREQ(dump_buf,
+"struct s1 {\n"
+"	int f1;\n"
+"};\n"
+"\n"
+"struct s2 {\n"
+"	struct s1 f1;\n"
+"	int f2;\n"
+"	int *f3;\n"
+"};\n\n", "c_dump");
+
+cleanup:
+	if (dump_buf_file)
+		fclose(dump_buf_file);
+	free(dump_buf);
+	btf_dump__free(d);
+	btf__free(btf1);
+	btf__free(btf2);
+}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 238f5f61189e..d6b14853f3bc 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -141,6 +141,17 @@ extern int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_NEQ(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act != ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld == expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
 #define ASSERT_STREQ(actual, expected, name) ({				\
 	static int duration = 0;					\
 	const char *___act = actual;					\
-- 
cgit v1.2.3


From 1306c980cf892bc17e7296d3e9ab8e9082f893a1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 4 Nov 2020 20:33:56 -0800
Subject: selftests/bpf: Add checking of raw type dump in BTF writer APIs
 selftests

Add re-usable btf_helpers.{c,h} to provide BTF-related testing routines. Start
with adding a raw BTF dumping helpers.

Raw BTF dump is the most succinct and at the same time a very human-friendly
way to validate exact contents of BTF types. Cross-validate raw BTF dump and
writable BTF in a single selftest. Raw type dump checks also serve as a good
self-documentation.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-7-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |   2 +-
 tools/testing/selftests/bpf/btf_helpers.c          | 200 +++++++++++++++++++++
 tools/testing/selftests/bpf/btf_helpers.h          |  12 ++
 tools/testing/selftests/bpf/prog_tests/btf_write.c |  43 +++++
 4 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/btf_helpers.c
 create mode 100644 tools/testing/selftests/bpf/btf_helpers.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 50e5b18fc455..c1708ffa6b1c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -386,7 +386,7 @@ TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
-			 flow_dissector_load.h
+			 btf_helpers.c	flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
new file mode 100644
index 000000000000..abc3f6c04cfc
--- /dev/null
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <stdio.h>
+#include <errno.h>
+#include <bpf/btf.h>
+
+static const char * const btf_kind_str_mapping[] = {
+	[BTF_KIND_UNKN]		= "UNKNOWN",
+	[BTF_KIND_INT]		= "INT",
+	[BTF_KIND_PTR]		= "PTR",
+	[BTF_KIND_ARRAY]	= "ARRAY",
+	[BTF_KIND_STRUCT]	= "STRUCT",
+	[BTF_KIND_UNION]	= "UNION",
+	[BTF_KIND_ENUM]		= "ENUM",
+	[BTF_KIND_FWD]		= "FWD",
+	[BTF_KIND_TYPEDEF]	= "TYPEDEF",
+	[BTF_KIND_VOLATILE]	= "VOLATILE",
+	[BTF_KIND_CONST]	= "CONST",
+	[BTF_KIND_RESTRICT]	= "RESTRICT",
+	[BTF_KIND_FUNC]		= "FUNC",
+	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
+	[BTF_KIND_VAR]		= "VAR",
+	[BTF_KIND_DATASEC]	= "DATASEC",
+};
+
+static const char *btf_kind_str(__u16 kind)
+{
+	if (kind > BTF_KIND_DATASEC)
+		return "UNKNOWN";
+	return btf_kind_str_mapping[kind];
+}
+
+static const char *btf_int_enc_str(__u8 encoding)
+{
+	switch (encoding) {
+	case 0:
+		return "(none)";
+	case BTF_INT_SIGNED:
+		return "SIGNED";
+	case BTF_INT_CHAR:
+		return "CHAR";
+	case BTF_INT_BOOL:
+		return "BOOL";
+	default:
+		return "UNKN";
+	}
+}
+
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+	switch (linkage) {
+	case BTF_VAR_STATIC:
+		return "static";
+	case BTF_VAR_GLOBAL_ALLOCATED:
+		return "global-alloc";
+	default:
+		return "(unknown)";
+	}
+}
+
+static const char *btf_func_linkage_str(const struct btf_type *t)
+{
+	switch (btf_vlen(t)) {
+	case BTF_FUNC_STATIC:
+		return "static";
+	case BTF_FUNC_GLOBAL:
+		return "global";
+	case BTF_FUNC_EXTERN:
+		return "extern";
+	default:
+		return "(unknown)";
+	}
+}
+
+static const char *btf_str(const struct btf *btf, __u32 off)
+{
+	if (!off)
+		return "(anon)";
+	return btf__str_by_offset(btf, off) ?: "(invalid)";
+}
+
+int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t;
+	int kind, i;
+	__u32 vlen;
+
+	t = btf__type_by_id(btf, id);
+	if (!t)
+		return -EINVAL;
+
+	vlen = btf_vlen(t);
+	kind = btf_kind(t);
+
+	fprintf(out, "[%u] %s '%s'", id, btf_kind_str(kind), btf_str(btf, t->name_off));
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		fprintf(out, " size=%u bits_offset=%u nr_bits=%u encoding=%s",
+			t->size, btf_int_offset(t), btf_int_bits(t),
+			btf_int_enc_str(btf_int_encoding(t)));
+		break;
+	case BTF_KIND_PTR:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+		fprintf(out, " type_id=%u", t->type);
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *arr = btf_array(t);
+
+		fprintf(out, " type_id=%u index_type_id=%u nr_elems=%u",
+			arr->type, arr->index_type, arr->nelems);
+		break;
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = btf_members(t);
+
+		fprintf(out, " size=%u vlen=%u", t->size, vlen);
+		for (i = 0; i < vlen; i++, m++) {
+			__u32 bit_off, bit_sz;
+
+			bit_off = btf_member_bit_offset(t, i);
+			bit_sz = btf_member_bitfield_size(t, i);
+			fprintf(out, "\n\t'%s' type_id=%u bits_offset=%u",
+				btf_str(btf, m->name_off), m->type, bit_off);
+			if (bit_sz)
+				fprintf(out, " bitfield_size=%u", bit_sz);
+		}
+		break;
+	}
+	case BTF_KIND_ENUM: {
+		const struct btf_enum *v = btf_enum(t);
+
+		fprintf(out, " size=%u vlen=%u", t->size, vlen);
+		for (i = 0; i < vlen; i++, v++) {
+			fprintf(out, "\n\t'%s' val=%u",
+				btf_str(btf, v->name_off), v->val);
+		}
+		break;
+	}
+	case BTF_KIND_FWD:
+		fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct");
+		break;
+	case BTF_KIND_FUNC:
+		fprintf(out, " type_id=%u linkage=%s", t->type, btf_func_linkage_str(t));
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = btf_params(t);
+
+		fprintf(out, " ret_type_id=%u vlen=%u", t->type, vlen);
+		for (i = 0; i < vlen; i++, p++) {
+			fprintf(out, "\n\t'%s' type_id=%u",
+				btf_str(btf, p->name_off), p->type);
+		}
+		break;
+	}
+	case BTF_KIND_VAR:
+		fprintf(out, " type_id=%u, linkage=%s",
+			t->type, btf_var_linkage_str(btf_var(t)->linkage));
+		break;
+	case BTF_KIND_DATASEC: {
+		const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+		fprintf(out, " size=%u vlen=%u", t->size, vlen);
+		for (i = 0; i < vlen; i++, v++) {
+			fprintf(out, "\n\ttype_id=%u offset=%u size=%u",
+				v->type, v->offset, v->size);
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/* Print raw BTF type dump into a local buffer and return string pointer back.
+ * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls
+ */
+const char *btf_type_raw_dump(const struct btf *btf, int type_id)
+{
+	static char buf[16 * 1024];
+	FILE *buf_file;
+
+	buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
+	if (!buf_file) {
+		fprintf(stderr, "Failed to open memstream: %d\n", errno);
+		return NULL;
+	}
+
+	fprintf_btf_type_raw(buf_file, btf, type_id);
+	fflush(buf_file);
+	fclose(buf_file);
+
+	return buf;
+}
diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h
new file mode 100644
index 000000000000..2c9ce1b61dc9
--- /dev/null
+++ b/tools/testing/selftests/bpf/btf_helpers.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef __BTF_HELPERS_H
+#define __BTF_HELPERS_H
+
+#include <stdio.h>
+#include <bpf/btf.h>
+
+int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id);
+const char *btf_type_raw_dump(const struct btf *btf, int type_id);
+
+#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index 314e1e7c36df..f36da15b134f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include <test_progs.h>
 #include <bpf/btf.h>
+#include "btf_helpers.h"
 
 static int duration = 0;
 
@@ -39,6 +40,8 @@ void test_btf_write() {
 	ASSERT_EQ(t->size, 4, "int_sz");
 	ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
 	ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 1),
+		     "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "raw_dump");
 
 	/* invalid int size */
 	id = btf__add_int(btf, "bad sz int", 7, 0);
@@ -59,24 +62,32 @@ void test_btf_write() {
 	t = btf__type_by_id(btf, 2);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
 	ASSERT_EQ(t->type, 1, "ptr_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 2),
+		     "[2] PTR '(anon)' type_id=1", "raw_dump");
 
 	id = btf__add_const(btf, 5); /* points forward to restrict */
 	ASSERT_EQ(id, 3, "const_id");
 	t = btf__type_by_id(btf, 3);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
 	ASSERT_EQ(t->type, 5, "const_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 3),
+		     "[3] CONST '(anon)' type_id=5", "raw_dump");
 
 	id = btf__add_volatile(btf, 3);
 	ASSERT_EQ(id, 4, "volatile_id");
 	t = btf__type_by_id(btf, 4);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
 	ASSERT_EQ(t->type, 3, "volatile_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 4),
+		     "[4] VOLATILE '(anon)' type_id=3", "raw_dump");
 
 	id = btf__add_restrict(btf, 4);
 	ASSERT_EQ(id, 5, "restrict_id");
 	t = btf__type_by_id(btf, 5);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
 	ASSERT_EQ(t->type, 4, "restrict_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 5),
+		     "[5] RESTRICT '(anon)' type_id=4", "raw_dump");
 
 	/* ARRAY */
 	id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
@@ -86,6 +97,8 @@ void test_btf_write() {
 	ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
 	ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
 	ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 6),
+		     "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", "raw_dump");
 
 	/* STRUCT */
 	err = btf__add_field(btf, "field", 1, 0, 0);
@@ -113,6 +126,10 @@ void test_btf_write() {
 	ASSERT_EQ(m->type, 1, "f2_type");
 	ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
 	ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 7),
+		     "[7] STRUCT 's1' size=8 vlen=2\n"
+		     "\t'f1' type_id=1 bits_offset=0\n"
+		     "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "raw_dump");
 
 	/* UNION */
 	id = btf__add_union(btf, "u1", 8);
@@ -136,6 +153,9 @@ void test_btf_write() {
 	ASSERT_EQ(m->type, 1, "f1_type");
 	ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
 	ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 8),
+		     "[8] UNION 'u1' size=8 vlen=1\n"
+		     "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", "raw_dump");
 
 	/* ENUM */
 	id = btf__add_enum(btf, "e1", 4);
@@ -156,6 +176,10 @@ void test_btf_write() {
 	v = btf_enum(t) + 1;
 	ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
 	ASSERT_EQ(v->val, 2, "v2_val");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 9),
+		     "[9] ENUM 'e1' size=4 vlen=2\n"
+		     "\t'v1' val=1\n"
+		     "\t'v2' val=2", "raw_dump");
 
 	/* FWDs */
 	id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
@@ -164,6 +188,8 @@ void test_btf_write() {
 	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
 	ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 10),
+		     "[10] FWD 'struct_fwd' fwd_kind=struct", "raw_dump");
 
 	id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
 	ASSERT_EQ(id, 11, "union_fwd_id");
@@ -171,6 +197,8 @@ void test_btf_write() {
 	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
 	ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 11),
+		     "[11] FWD 'union_fwd' fwd_kind=union", "raw_dump");
 
 	id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
 	ASSERT_EQ(id, 12, "enum_fwd_id");
@@ -179,6 +207,8 @@ void test_btf_write() {
 	ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
 	ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
 	ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 12),
+		     "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump");
 
 	/* TYPEDEF */
 	id = btf__add_typedef(btf, "typedef1", 1);
@@ -187,6 +217,8 @@ void test_btf_write() {
 	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
 	ASSERT_EQ(t->type, 1, "typedef_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 13),
+		     "[13] TYPEDEF 'typedef1' type_id=1", "raw_dump");
 
 	/* FUNC & FUNC_PROTO */
 	id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
@@ -196,6 +228,8 @@ void test_btf_write() {
 	ASSERT_EQ(t->type, 15, "func_type");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
 	ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 14),
+		     "[14] FUNC 'func1' type_id=15 linkage=global", "raw_dump");
 
 	id = btf__add_func_proto(btf, 1);
 	ASSERT_EQ(id, 15, "func_proto_id");
@@ -214,6 +248,10 @@ void test_btf_write() {
 	p = btf_params(t) + 1;
 	ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
 	ASSERT_EQ(p->type, 2, "p2_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 15),
+		     "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+		     "\t'p1' type_id=1\n"
+		     "\t'p2' type_id=2", "raw_dump");
 
 	/* VAR */
 	id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
@@ -223,6 +261,8 @@ void test_btf_write() {
 	ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
 	ASSERT_EQ(t->type, 1, "var_type");
 	ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 16),
+		     "[16] VAR 'var1' type_id=1, linkage=global-alloc", "raw_dump");
 
 	/* DATASECT */
 	id = btf__add_datasec(btf, "datasec1", 12);
@@ -239,6 +279,9 @@ void test_btf_write() {
 	ASSERT_EQ(vi->type, 1, "v1_type");
 	ASSERT_EQ(vi->offset, 4, "v1_off");
 	ASSERT_EQ(vi->size, 8, "v1_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 17),
+		     "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+		     "\ttype_id=1 offset=4 size=8", "raw_dump");
 
 	btf__free(btf);
 }
-- 
cgit v1.2.3


From d8123624506cd62730c9cd9c7672c698e462703d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:57 -0800
Subject: libbpf: Fix BTF data layout checks and allow empty BTF

Make data section layout checks stricter, disallowing overlap of types and
strings data.

Additionally, allow BTFs with no type data. There is nothing inherently wrong
with having BTF with no types (put potentially with some strings). This could
be a situation with kernel module BTFs, if module doesn't introduce any new
type information.

Also fix invalid offset alignment check for btf->hdr->type_off.

Fixes: 8a138aed4a80 ("bpf: btf: Add BTF support to libbpf")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-8-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 0258cf108c0a..20bb88e71f07 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -245,22 +245,18 @@ static int btf_parse_hdr(struct btf *btf)
 		return -EINVAL;
 	}
 
-	if (meta_left < hdr->type_off) {
-		pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
+	if (meta_left < hdr->str_off + hdr->str_len) {
+		pr_debug("Invalid BTF total size:%u\n", btf->raw_size);
 		return -EINVAL;
 	}
 
-	if (meta_left < hdr->str_off) {
-		pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
+	if (hdr->type_off + hdr->type_len > hdr->str_off) {
+		pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
+			 hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
 		return -EINVAL;
 	}
 
-	if (hdr->type_off >= hdr->str_off) {
-		pr_debug("BTF type section offset >= string section offset. No type?\n");
-		return -EINVAL;
-	}
-
-	if (hdr->type_off & 0x02) {
+	if (hdr->type_off % 4) {
 		pr_debug("BTF type section is not aligned to 4 bytes\n");
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From f86524efcf9e3f3a7cf75ebcd82cf8f58ec716cc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:58 -0800
Subject: libbpf: Support BTF dedup of split BTFs

Add support for deduplication split BTFs. When deduplicating split BTF, base
BTF is considered to be immutable and can't be modified or adjusted. 99% of
BTF deduplication logic is left intact (module some type numbering adjustments).
There are only two differences.

First, each type in base BTF gets hashed (expect VAR and DATASEC, of course,
those are always considered to be self-canonical instances) and added into
a table of canonical table candidates. Hashing is a shallow, fast operation,
so mostly eliminates the overhead of having entire base BTF to be a part of
BTF dedup.

Second difference is very critical and subtle. While deduplicating split BTF
types, it is possible to discover that one of immutable base BTF BTF_KIND_FWD
types can and should be resolved to a full STRUCT/UNION type from the split
BTF part.  This is, obviously, can't happen because we can't modify the base
BTF types anymore. So because of that, any type in split BTF that directly or
indirectly references that newly-to-be-resolved FWD type can't be considered
to be equivalent to the corresponding canonical types in base BTF, because
that would result in a loss of type resolution information. So in such case,
split BTF types will be deduplicated separately and will cause some
duplication of type information, which is unavoidable.

With those two changes, the rest of the algorithm manages to deduplicate split
BTF correctly, pointing all the duplicates to their canonical counter-parts in
base BTF, but also is deduplicating whatever unique types are present in split
BTF on their own.

Also, theoretically, split BTF after deduplication could end up with either
empty type section or empty string section. This is handled by libbpf
correctly in one of previous patches in the series.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-9-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 221 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 168 insertions(+), 53 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 20bb88e71f07..8d04d9becb67 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -2718,6 +2718,7 @@ struct btf_dedup;
 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 				       const struct btf_dedup_opts *opts);
 static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_prep(struct btf_dedup *d);
 static int btf_dedup_strings(struct btf_dedup *d);
 static int btf_dedup_prim_types(struct btf_dedup *d);
 static int btf_dedup_struct_types(struct btf_dedup *d);
@@ -2876,6 +2877,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
 
+	err = btf_dedup_prep(d);
+	if (err) {
+		pr_debug("btf_dedup_prep failed:%d\n", err);
+		goto done;
+	}
 	err = btf_dedup_strings(d);
 	if (err < 0) {
 		pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -2938,6 +2944,13 @@ struct btf_dedup {
 	__u32 *hypot_list;
 	size_t hypot_cnt;
 	size_t hypot_cap;
+	/* Whether hypothetical mapping, if successful, would need to adjust
+	 * already canonicalized types (due to a new forward declaration to
+	 * concrete type resolution). In such case, during split BTF dedup
+	 * candidate type would still be considered as different, because base
+	 * BTF is considered to be immutable.
+	 */
+	bool hypot_adjust_canon;
 	/* Various option modifying behavior of algorithm */
 	struct btf_dedup_opts opts;
 	/* temporary strings deduplication state */
@@ -2985,6 +2998,7 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
 	for (i = 0; i < d->hypot_cnt; i++)
 		d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
 	d->hypot_cnt = 0;
+	d->hypot_adjust_canon = false;
 }
 
 static void btf_dedup_free(struct btf_dedup *d)
@@ -3024,7 +3038,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 {
 	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
 	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
-	int i, err = 0;
+	int i, err = 0, type_cnt;
 
 	if (!d)
 		return ERR_PTR(-ENOMEM);
@@ -3044,14 +3058,15 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 		goto done;
 	}
 
-	d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	type_cnt = btf__get_nr_types(btf) + 1;
+	d->map = malloc(sizeof(__u32) * type_cnt);
 	if (!d->map) {
 		err = -ENOMEM;
 		goto done;
 	}
 	/* special BTF "void" type is made canonical immediately */
 	d->map[0] = 0;
-	for (i = 1; i <= btf->nr_types; i++) {
+	for (i = 1; i < type_cnt; i++) {
 		struct btf_type *t = btf_type_by_id(d->btf, i);
 
 		/* VAR and DATASEC are never deduped and are self-canonical */
@@ -3061,12 +3076,12 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 			d->map[i] = BTF_UNPROCESSED_ID;
 	}
 
-	d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	d->hypot_map = malloc(sizeof(__u32) * type_cnt);
 	if (!d->hypot_map) {
 		err = -ENOMEM;
 		goto done;
 	}
-	for (i = 0; i <= btf->nr_types; i++)
+	for (i = 0; i < type_cnt; i++)
 		d->hypot_map[i] = BTF_UNPROCESSED_ID;
 
 done:
@@ -3090,8 +3105,8 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 	int i, j, r, rec_size;
 	struct btf_type *t;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		t = btf_type_by_id(d->btf, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		t = btf_type_by_id(d->btf, d->btf->start_id + i);
 		r = fn(&t->name_off, ctx);
 		if (r)
 			return r;
@@ -3174,15 +3189,27 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 {
 	struct btf_dedup *d = ctx;
+	__u32 str_off = *str_off_ptr;
 	long old_off, new_off, len;
 	const char *s;
 	void *p;
 	int err;
 
-	if (*str_off_ptr == 0)
+	/* don't touch empty string or string in main BTF */
+	if (str_off == 0 || str_off < d->btf->start_str_off)
 		return 0;
 
-	s = btf__str_by_offset(d->btf, *str_off_ptr);
+	s = btf__str_by_offset(d->btf, str_off);
+	if (d->btf->base_btf) {
+		err = btf__find_str(d->btf->base_btf, s);
+		if (err >= 0) {
+			*str_off_ptr = err;
+			return 0;
+		}
+		if (err != -ENOENT)
+			return err;
+	}
+
 	len = strlen(s) + 1;
 
 	new_off = d->strs_len;
@@ -3199,11 +3226,11 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 	err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
 			      HASHMAP_ADD, (const void **)&old_off, NULL);
 	if (err == -EEXIST) {
-		*str_off_ptr = old_off;
+		*str_off_ptr = d->btf->start_str_off + old_off;
 	} else if (err) {
 		return err;
 	} else {
-		*str_off_ptr = new_off;
+		*str_off_ptr = d->btf->start_str_off + new_off;
 		d->strs_len += len;
 	}
 	return 0;
@@ -3228,13 +3255,6 @@ static int btf_dedup_strings(struct btf_dedup *d)
 	if (d->btf->strs_deduped)
 		return 0;
 
-	s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
-	if (!s)
-		return -ENOMEM;
-	/* initial empty string */
-	s[0] = 0;
-	d->strs_len = 1;
-
 	/* temporarily switch to use btf_dedup's strs_data for strings for hash
 	 * functions; later we'll just transfer hashmap to struct btf as is,
 	 * along the strs_data
@@ -3248,13 +3268,22 @@ static int btf_dedup_strings(struct btf_dedup *d)
 		goto err_out;
 	}
 
-	/* insert empty string; we won't be looking it up during strings
-	 * dedup, but it's good to have it for generic BTF string lookups
-	 */
-	err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
-			      HASHMAP_ADD, NULL, NULL);
-	if (err)
-		goto err_out;
+	if (!d->btf->base_btf) {
+		s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
+		if (!s)
+			return -ENOMEM;
+		/* initial empty string */
+		s[0] = 0;
+		d->strs_len = 1;
+
+		/* insert empty string; we won't be looking it up during strings
+		 * dedup, but it's good to have it for generic BTF string lookups
+		 */
+		err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
+				      HASHMAP_ADD, NULL, NULL);
+		if (err)
+			goto err_out;
+	}
 
 	/* remap string offsets */
 	err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
@@ -3549,6 +3578,66 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 	return true;
 }
 
+/* Prepare split BTF for deduplication by calculating hashes of base BTF's
+ * types and initializing the rest of the state (canonical type mapping) for
+ * the fixed base BTF part.
+ */
+static int btf_dedup_prep(struct btf_dedup *d)
+{
+	struct btf_type *t;
+	int type_id;
+	long h;
+
+	if (!d->btf->base_btf)
+		return 0;
+
+	for (type_id = 1; type_id < d->btf->start_id; type_id++) {
+		t = btf_type_by_id(d->btf, type_id);
+
+		/* all base BTF types are self-canonical by definition */
+		d->map[type_id] = type_id;
+
+		switch (btf_kind(t)) {
+		case BTF_KIND_VAR:
+		case BTF_KIND_DATASEC:
+			/* VAR and DATASEC are never hash/deduplicated */
+			continue;
+		case BTF_KIND_CONST:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_PTR:
+		case BTF_KIND_FWD:
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_FUNC:
+			h = btf_hash_common(t);
+			break;
+		case BTF_KIND_INT:
+			h = btf_hash_int(t);
+			break;
+		case BTF_KIND_ENUM:
+			h = btf_hash_enum(t);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			h = btf_hash_struct(t);
+			break;
+		case BTF_KIND_ARRAY:
+			h = btf_hash_array(t);
+			break;
+		case BTF_KIND_FUNC_PROTO:
+			h = btf_hash_fnproto(t);
+			break;
+		default:
+			pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id);
+			return -EINVAL;
+		}
+		if (btf_dedup_table_add(d, h, type_id))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /*
  * Deduplicate primitive types, that can't reference other types, by calculating
  * their type signature hash and comparing them with any possible canonical
@@ -3642,8 +3731,8 @@ static int btf_dedup_prim_types(struct btf_dedup *d)
 {
 	int i, err;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		err = btf_dedup_prim_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_prim_type(d, d->btf->start_id + i);
 		if (err)
 			return err;
 	}
@@ -3833,6 +3922,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		} else {
 			real_kind = cand_kind;
 			fwd_kind = btf_fwd_kind(canon_type);
+			/* we'd need to resolve base FWD to STRUCT/UNION */
+			if (fwd_kind == real_kind && canon_id < d->btf->start_id)
+				d->hypot_adjust_canon = true;
 		}
 		return fwd_kind == real_kind;
 	}
@@ -3870,8 +3962,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 			return 0;
 		cand_arr = btf_array(cand_type);
 		canon_arr = btf_array(canon_type);
-		eq = btf_dedup_is_equiv(d,
-			cand_arr->index_type, canon_arr->index_type);
+		eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type);
 		if (eq <= 0)
 			return eq;
 		return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
@@ -3954,16 +4045,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
  */
 static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 {
-	__u32 cand_type_id, targ_type_id;
+	__u32 canon_type_id, targ_type_id;
 	__u16 t_kind, c_kind;
 	__u32 t_id, c_id;
 	int i;
 
 	for (i = 0; i < d->hypot_cnt; i++) {
-		cand_type_id = d->hypot_list[i];
-		targ_type_id = d->hypot_map[cand_type_id];
+		canon_type_id = d->hypot_list[i];
+		targ_type_id = d->hypot_map[canon_type_id];
 		t_id = resolve_type_id(d, targ_type_id);
-		c_id = resolve_type_id(d, cand_type_id);
+		c_id = resolve_type_id(d, canon_type_id);
 		t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
 		c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
 		/*
@@ -3978,9 +4069,26 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 		 * stability is not a requirement for STRUCT/UNION equivalence
 		 * checks, though.
 		 */
+
+		/* if it's the split BTF case, we still need to point base FWD
+		 * to STRUCT/UNION in a split BTF, because FWDs from split BTF
+		 * will be resolved against base FWD. If we don't point base
+		 * canonical FWD to the resolved STRUCT/UNION, then all the
+		 * FWDs in split BTF won't be correctly resolved to a proper
+		 * STRUCT/UNION.
+		 */
 		if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
 			d->map[c_id] = t_id;
-		else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+
+		/* if graph equivalence determined that we'd need to adjust
+		 * base canonical types, then we need to only point base FWDs
+		 * to STRUCTs/UNIONs and do no more modifications. For all
+		 * other purposes the type graphs were not equivalent.
+		 */
+		if (d->hypot_adjust_canon)
+			continue;
+		
+		if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
 			d->map[t_id] = c_id;
 
 		if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
@@ -4064,8 +4172,10 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 			return eq;
 		if (!eq)
 			continue;
-		new_id = cand_id;
 		btf_dedup_merge_hypot_map(d);
+		if (d->hypot_adjust_canon) /* not really equivalent */
+			continue;
+		new_id = cand_id;
 		break;
 	}
 
@@ -4080,8 +4190,8 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
 {
 	int i, err;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		err = btf_dedup_struct_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_struct_type(d, d->btf->start_id + i);
 		if (err)
 			return err;
 	}
@@ -4224,8 +4334,8 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 {
 	int i, err;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		err = btf_dedup_ref_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_ref_type(d, d->btf->start_id + i);
 		if (err < 0)
 			return err;
 	}
@@ -4249,39 +4359,44 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 static int btf_dedup_compact_types(struct btf_dedup *d)
 {
 	__u32 *new_offs;
-	__u32 next_type_id = 1;
+	__u32 next_type_id = d->btf->start_id;
+	const struct btf_type *t;
 	void *p;
-	int i, len;
+	int i, id, len;
 
 	/* we are going to reuse hypot_map to store compaction remapping */
 	d->hypot_map[0] = 0;
-	for (i = 1; i <= d->btf->nr_types; i++)
-		d->hypot_map[i] = BTF_UNPROCESSED_ID;
+	/* base BTF types are not renumbered */
+	for (id = 1; id < d->btf->start_id; id++)
+		d->hypot_map[id] = id;
+	for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++)
+		d->hypot_map[id] = BTF_UNPROCESSED_ID;
 
 	p = d->btf->types_data;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		if (d->map[i] != i)
+	for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) {
+		if (d->map[id] != id)
 			continue;
 
-		len = btf_type_size(btf__type_by_id(d->btf, i));
+		t = btf__type_by_id(d->btf, id);
+		len = btf_type_size(t);
 		if (len < 0)
 			return len;
 
-		memmove(p, btf__type_by_id(d->btf, i), len);
-		d->hypot_map[i] = next_type_id;
-		d->btf->type_offs[next_type_id - 1] = p - d->btf->types_data;
+		memmove(p, t, len);
+		d->hypot_map[id] = next_type_id;
+		d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data;
 		p += len;
 		next_type_id++;
 	}
 
 	/* shrink struct btf's internal types index and update btf_header */
-	d->btf->nr_types = next_type_id - 1;
+	d->btf->nr_types = next_type_id - d->btf->start_id;
 	d->btf->type_offs_cap = d->btf->nr_types;
 	d->btf->hdr->type_len = p - d->btf->types_data;
 	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
 				       sizeof(*new_offs));
-	if (!new_offs)
+	if (d->btf->type_offs_cap && !new_offs)
 		return -ENOMEM;
 	d->btf->type_offs = new_offs;
 	d->btf->hdr->str_off = d->btf->hdr->type_len;
@@ -4413,8 +4528,8 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
 {
 	int i, r;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		r = btf_dedup_remap_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		r = btf_dedup_remap_type(d, d->btf->start_id + i);
 		if (r < 0)
 			return r;
 	}
-- 
cgit v1.2.3


From 6b6e6b1d09aa20c351a1fce0ea6402da436624a4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:59 -0800
Subject: libbpf: Accomodate DWARF/compiler bug with duplicated identical
 arrays

In some cases compiler seems to generate distinct DWARF types for identical
arrays within the same CU. That seems like a bug, but it's already out there
and breaks type graph equivalence checks, so accommodate it anyway by checking
for identical arrays, regardless of their type ID.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-10-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8d04d9becb67..2d0d064c6d31 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3785,6 +3785,19 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
 	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
 }
 
+/* Check if given two types are identical ARRAY definitions */
+static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+	struct btf_type *t1, *t2;
+
+	t1 = btf_type_by_id(d->btf, id1);
+	t2 = btf_type_by_id(d->btf, id2);
+	if (!btf_is_array(t1) || !btf_is_array(t2))
+		return 0;
+
+	return btf_equal_array(t1, t2);
+}
+
 /*
  * Check equivalence of BTF type graph formed by candidate struct/union (we'll
  * call it "candidate graph" in this description for brevity) to a type graph
@@ -3895,8 +3908,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 	canon_id = resolve_fwd_id(d, canon_id);
 
 	hypot_type_id = d->hypot_map[canon_id];
-	if (hypot_type_id <= BTF_MAX_NR_TYPES)
-		return hypot_type_id == cand_id;
+	if (hypot_type_id <= BTF_MAX_NR_TYPES) {
+		/* In some cases compiler will generate different DWARF types
+		 * for *identical* array type definitions and use them for
+		 * different fields within the *same* struct. This breaks type
+		 * equivalence check, which makes an assumption that candidate
+		 * types sub-graph has a consistent and deduped-by-compiler
+		 * types within a single CU. So work around that by explicitly
+		 * allowing identical array types here.
+		 */
+		return hypot_type_id == cand_id ||
+		       btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
+	}
 
 	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
 		return -ENOMEM;
-- 
cgit v1.2.3


From 232338fa2fb47726ab7c459419115a6ab6bfb3e3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:34:00 -0800
Subject: selftests/bpf: Add split BTF dedup selftests

Add selftests validating BTF deduplication for split BTF case. Add a helper
macro that allows to validate entire BTF with raw BTF dump, not just
type-by-type. This saves tons of code and complexity.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-11-andrii@kernel.org
---
 tools/testing/selftests/bpf/btf_helpers.c          |  59 ++++
 tools/testing/selftests/bpf/btf_helpers.h          |   7 +
 .../selftests/bpf/prog_tests/btf_dedup_split.c     | 325 +++++++++++++++++++++
 3 files changed, 391 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index abc3f6c04cfc..48f90490f922 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -3,6 +3,8 @@
 #include <stdio.h>
 #include <errno.h>
 #include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
 
 static const char * const btf_kind_str_mapping[] = {
 	[BTF_KIND_UNKN]		= "UNKNOWN",
@@ -198,3 +200,60 @@ const char *btf_type_raw_dump(const struct btf *btf, int type_id)
 
 	return buf;
 }
+
+int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[])
+{
+	int i;
+	bool ok = true;
+
+	ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types");
+
+	for (i = 1; i <= nr_types; i++) {
+		if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump"))
+			ok = false;
+	}
+
+	return ok;
+}
+
+static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+	vfprintf(ctx, fmt, args);
+}
+
+/* Print BTF-to-C dump into a local buffer and return string pointer back.
+ * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls
+ */
+const char *btf_type_c_dump(const struct btf *btf)
+{
+	static char buf[16 * 1024];
+	FILE *buf_file;
+	struct btf_dump *d = NULL;
+	struct btf_dump_opts opts = {};
+	int err, i;
+
+	buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
+	if (!buf_file) {
+		fprintf(stderr, "Failed to open memstream: %d\n", errno);
+		return NULL;
+	}
+
+	opts.ctx = buf_file;
+	d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+	if (libbpf_get_error(d)) {
+		fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d));
+		return NULL;
+	}
+
+	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+		err = btf_dump__dump_type(d, i);
+		if (err) {
+			fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
+			return NULL;
+		}
+	}
+
+	fflush(buf_file);
+	fclose(buf_file);
+	return buf;
+}
diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h
index 2c9ce1b61dc9..295c0137d9bd 100644
--- a/tools/testing/selftests/bpf/btf_helpers.h
+++ b/tools/testing/selftests/bpf/btf_helpers.h
@@ -8,5 +8,12 @@
 
 int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id);
 const char *btf_type_raw_dump(const struct btf *btf, int type_id);
+int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]);
 
+#define VALIDATE_RAW_BTF(btf, raw_types...)				\
+	btf_validate_raw(btf,						\
+			 sizeof((const char *[]){raw_types})/sizeof(void *),\
+			 (const char *[]){raw_types})
+
+const char *btf_type_c_dump(const struct btf *btf);
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
new file mode 100644
index 000000000000..64554fd33547
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+static void test_split_simple() {
+	const struct btf_type *t;
+	struct btf *btf1, *btf2;
+	int str_off, err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 1);				/* [2] ptr to int */
+	btf__add_struct(btf1, "s1", 4);			/* [3] struct s1 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+	ASSERT_STREQ(btf_type_c_dump(btf1), "\
+struct s1 {\n\
+	int f1;\n\
+};\n\n", "c_dump");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	/* pointer size should be "inherited" from main BTF */
+	ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz");
+
+	str_off = btf__find_str(btf2, "int");
+	ASSERT_NEQ(str_off, -ENOENT, "str_int_missing");
+
+	t = btf__type_by_id(btf2, 1);
+	if (!ASSERT_OK_PTR(t, "int_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_int(t), true, "int_kind");
+	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name");
+
+	btf__add_struct(btf2, "s2", 16);		/* [4] struct s2 {	*/
+	btf__add_field(btf2, "f1", 6, 0, 0);		/*      struct s1 f1;	*/
+	btf__add_field(btf2, "f2", 5, 32, 0);		/*      int f2;		*/
+	btf__add_field(btf2, "f3", 2, 64, 0);		/*      int *f3;	*/
+							/* } */
+
+	/* duplicated int */
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [5] int */
+
+	/* duplicated struct s1 */
+	btf__add_struct(btf2, "s1", 4);			/* [6] struct s1 { */
+	btf__add_field(btf2, "f1", 5, 0, 0);		/*      int f1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[4] STRUCT 's2' size=16 vlen=3\n"
+		"\t'f1' type_id=6 bits_offset=0\n"
+		"\t'f2' type_id=5 bits_offset=32\n"
+		"\t'f3' type_id=2 bits_offset=64",
+		"[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[6] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=5 bits_offset=0");
+
+	ASSERT_STREQ(btf_type_c_dump(btf2), "\
+struct s1 {\n\
+	int f1;\n\
+};\n\
+\n\
+struct s1___2 {\n\
+	int f1;\n\
+};\n\
+\n\
+struct s2 {\n\
+	struct s1___2 f1;\n\
+	int f2;\n\
+	int *f3;\n\
+};\n\n", "c_dump");
+
+	err = btf__dedup(btf2, NULL, NULL);
+	if (!ASSERT_OK(err, "btf_dedup"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[4] STRUCT 's2' size=16 vlen=3\n"
+		"\t'f1' type_id=3 bits_offset=0\n"
+		"\t'f2' type_id=1 bits_offset=32\n"
+		"\t'f3' type_id=2 bits_offset=64");
+
+	ASSERT_STREQ(btf_type_c_dump(btf2), "\
+struct s1 {\n\
+	int f1;\n\
+};\n\
+\n\
+struct s2 {\n\
+	struct s1 f1;\n\
+	int f2;\n\
+	int *f3;\n\
+};\n\n", "c_dump");
+
+cleanup:
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
+static void test_split_fwd_resolve() {
+	struct btf *btf1, *btf2;
+	int err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 4);				/* [2] ptr to struct s1 */
+	btf__add_ptr(btf1, 5);				/* [3] ptr to struct s2 */
+	btf__add_struct(btf1, "s1", 16);		/* [4] struct s1 { */
+	btf__add_field(btf1, "f1", 2, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf1, "f2", 3, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+	btf__add_struct(btf1, "s2", 4);			/* [5] struct s2 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=4",
+		"[3] PTR '(anon)' type_id=5",
+		"[4] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=3 bits_offset=64",
+		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [6] int */
+	btf__add_ptr(btf2, 10);				/* [7] ptr to struct s1 */
+	btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT);	/* [8] fwd for struct s2 */
+	btf__add_ptr(btf2, 8);				/* [9] ptr to fwd struct s2 */
+	btf__add_struct(btf2, "s1", 16);		/* [10] struct s1 { */
+	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=4",
+		"[3] PTR '(anon)' type_id=5",
+		"[4] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=3 bits_offset=64",
+		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[7] PTR '(anon)' type_id=10",
+		"[8] FWD 's2' fwd_kind=struct",
+		"[9] PTR '(anon)' type_id=8",
+		"[10] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=7 bits_offset=0\n"
+		"\t'f2' type_id=9 bits_offset=64");
+
+	err = btf__dedup(btf2, NULL, NULL);
+	if (!ASSERT_OK(err, "btf_dedup"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=4",
+		"[3] PTR '(anon)' type_id=5",
+		"[4] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=3 bits_offset=64",
+		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+cleanup:
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
+static void test_split_struct_duped() {
+	struct btf *btf1, *btf2;
+	int err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 5);				/* [2] ptr to struct s1 */
+	btf__add_fwd(btf1, "s2", BTF_FWD_STRUCT);	/* [3] fwd for struct s2 */
+	btf__add_ptr(btf1, 3);				/* [4] ptr to fwd struct s2 */
+	btf__add_struct(btf1, "s1", 16);		/* [5] struct s1 { */
+	btf__add_field(btf1, "f1", 2, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf1, "f2", 4, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=5",
+		"[3] FWD 's2' fwd_kind=struct",
+		"[4] PTR '(anon)' type_id=3",
+		"[5] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=4 bits_offset=64");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [6] int */
+	btf__add_ptr(btf2, 10);				/* [7] ptr to struct s1 */
+	btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT);	/* [8] fwd for struct s2 */
+	btf__add_ptr(btf2, 11);				/* [9] ptr to struct s2 */
+	btf__add_struct(btf2, "s1", 16);		/* [10] struct s1 { */
+	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+	btf__add_struct(btf2, "s2", 40);		/* [11] struct s2 {	*/
+	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1;	*/
+	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2;	*/
+	btf__add_field(btf2, "f3", 6, 128, 0);		/*      int f3;		*/
+	btf__add_field(btf2, "f4", 10, 192, 0);		/*      struct s1 f4;	*/
+							/* } */
+	btf__add_ptr(btf2, 8);				/* [12] ptr to fwd struct s2 */
+	btf__add_struct(btf2, "s3", 8);			/* [13] struct s3 { */
+	btf__add_field(btf2, "f1", 12, 0, 0);		/*      struct s2 *f1; (fwd) */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=5",
+		"[3] FWD 's2' fwd_kind=struct",
+		"[4] PTR '(anon)' type_id=3",
+		"[5] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=4 bits_offset=64",
+		"[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[7] PTR '(anon)' type_id=10",
+		"[8] FWD 's2' fwd_kind=struct",
+		"[9] PTR '(anon)' type_id=11",
+		"[10] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=7 bits_offset=0\n"
+		"\t'f2' type_id=9 bits_offset=64",
+		"[11] STRUCT 's2' size=40 vlen=4\n"
+		"\t'f1' type_id=7 bits_offset=0\n"
+		"\t'f2' type_id=9 bits_offset=64\n"
+		"\t'f3' type_id=6 bits_offset=128\n"
+		"\t'f4' type_id=10 bits_offset=192",
+		"[12] PTR '(anon)' type_id=8",
+		"[13] STRUCT 's3' size=8 vlen=1\n"
+		"\t'f1' type_id=12 bits_offset=0");
+
+	err = btf__dedup(btf2, NULL, NULL);
+	if (!ASSERT_OK(err, "btf_dedup"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=5",
+		"[3] FWD 's2' fwd_kind=struct",
+		"[4] PTR '(anon)' type_id=3",
+		"[5] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=4 bits_offset=64",
+		"[6] PTR '(anon)' type_id=8",
+		"[7] PTR '(anon)' type_id=9",
+		"[8] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=6 bits_offset=0\n"
+		"\t'f2' type_id=7 bits_offset=64",
+		"[9] STRUCT 's2' size=40 vlen=4\n"
+		"\t'f1' type_id=6 bits_offset=0\n"
+		"\t'f2' type_id=7 bits_offset=64\n"
+		"\t'f3' type_id=1 bits_offset=128\n"
+		"\t'f4' type_id=8 bits_offset=192",
+		"[10] STRUCT 's3' size=8 vlen=1\n"
+		"\t'f1' type_id=7 bits_offset=0");
+
+cleanup:
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
+void test_btf_dedup_split()
+{
+	if (test__start_subtest("split_simple"))
+		test_split_simple();
+	if (test__start_subtest("split_struct_duped"))
+		test_split_struct_duped();
+	if (test__start_subtest("split_fwd_resolve"))
+		test_split_fwd_resolve();
+}
-- 
cgit v1.2.3


From 75fa1777694c245c1e59ac774cb1d58a15ecefeb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:34:01 -0800
Subject: tools/bpftool: Add bpftool support for split BTF

Add ability to work with split BTF by providing extra -B flag, which allows to
specify the path to the base BTF file.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-12-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c  |  9 ++++++---
 tools/bpf/bpftool/main.c | 15 ++++++++++++++-
 tools/bpf/bpftool/main.h |  1 +
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 8ab142ff5eac..c96b56e8e3a4 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -358,8 +358,12 @@ static int dump_btf_raw(const struct btf *btf,
 		}
 	} else {
 		int cnt = btf__get_nr_types(btf);
+		int start_id = 1;
 
-		for (i = 1; i <= cnt; i++) {
+		if (base_btf)
+			start_id = btf__get_nr_types(base_btf) + 1;
+
+		for (i = start_id; i <= cnt; i++) {
 			t = btf__type_by_id(btf, i);
 			dump_btf_type(btf, i, t);
 		}
@@ -438,7 +442,6 @@ static int do_dump(int argc, char **argv)
 		return -1;
 	}
 	src = GET_ARG();
-
 	if (is_prefix(src, "map")) {
 		struct bpf_map_info info = {};
 		__u32 len = sizeof(info);
@@ -499,7 +502,7 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		btf = btf__parse(*argv, NULL);
+		btf = btf__parse_split(*argv, base_btf);
 		if (IS_ERR(btf)) {
 			err = -PTR_ERR(btf);
 			btf = NULL;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 682daaa49e6a..b86f450e6fce 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -11,6 +11,7 @@
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include <bpf/btf.h>
 
 #include "main.h"
 
@@ -28,6 +29,7 @@ bool show_pinned;
 bool block_mount;
 bool verifier_logs;
 bool relaxed_maps;
+struct btf *base_btf;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 struct pinned_obj_table link_table;
@@ -391,6 +393,7 @@ int main(int argc, char **argv)
 		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ "nomount",	no_argument,	NULL,	'n' },
 		{ "debug",	no_argument,	NULL,	'd' },
+		{ "base-btf",	required_argument, NULL, 'B' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -407,7 +410,7 @@ int main(int argc, char **argv)
 	hash_init(link_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
+	while ((opt = getopt_long(argc, argv, "VhpjfmndB:",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -441,6 +444,15 @@ int main(int argc, char **argv)
 			libbpf_set_print(print_all_levels);
 			verifier_logs = true;
 			break;
+		case 'B':
+			base_btf = btf__parse(optarg, NULL);
+			if (libbpf_get_error(base_btf)) {
+				p_err("failed to parse base BTF at '%s': %ld\n",
+				      optarg, libbpf_get_error(base_btf));
+				base_btf = NULL;
+				return -1;
+			}
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
@@ -465,6 +477,7 @@ int main(int argc, char **argv)
 		delete_pinned_obj_table(&map_table);
 		delete_pinned_obj_table(&link_table);
 	}
+	btf__free(base_btf);
 
 	return ret;
 }
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index c46e52137b87..76e91641262b 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -90,6 +90,7 @@ extern bool show_pids;
 extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
+extern struct btf *base_btf;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 extern struct pinned_obj_table link_table;
-- 
cgit v1.2.3


From c6bde958a62b8ca5ee8d2c1fe429aec4ad54efad Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Thu, 29 Oct 2020 21:14:42 +0100
Subject: bpf: Lift hashtab key_size limit

Currently key_size of hashtab is limited to MAX_BPF_STACK.
As the key of hashtab can also be a value from a per cpu map it can be
larger than MAX_BPF_STACK.

The use-case for this patch originates to implement allow/disallow
lists for files and file paths. The maximum length of file paths is
defined by PATH_MAX with 4096 chars including nul.
This limit exceeds MAX_BPF_STACK.

Changelog:

v5:
 - Fix cast overflow

v4:
 - Utilize BPF skeleton in tests
 - Rebase

v3:
 - Rebase

v2:
 - Add a test for bpf side

Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201029201442.596690-1-dev@der-flo.net
---
 .../selftests/bpf/prog_tests/hash_large_key.c      | 43 +++++++++++++++++++++
 .../selftests/bpf/progs/test_hash_large_key.c      | 44 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_maps.c            |  3 +-
 3 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/hash_large_key.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_hash_large_key.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/hash_large_key.c b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c
new file mode 100644
index 000000000000..34684c0fc76d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_hash_large_key.skel.h"
+
+void test_hash_large_key(void)
+{
+	int err, value = 21, duration = 0, hash_map_fd;
+	struct test_hash_large_key *skel;
+
+	struct bigelement {
+		int a;
+		char b[4096];
+		long long c;
+	} key;
+	bzero(&key, sizeof(key));
+
+	skel = test_hash_large_key__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+		return;
+
+	hash_map_fd = bpf_map__fd(skel->maps.hash_map);
+	if (CHECK(hash_map_fd < 0, "bpf_map__fd", "failed\n"))
+		goto cleanup;
+
+	err = test_hash_large_key__attach(skel);
+	if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+		goto cleanup;
+
+	err = bpf_map_update_elem(hash_map_fd, &key, &value, BPF_ANY);
+	if (CHECK(err, "bpf_map_update_elem", "errno=%d\n", errno))
+		goto cleanup;
+
+	key.c = 1;
+	err = bpf_map_lookup_elem(hash_map_fd, &key, &value);
+	if (CHECK(err, "bpf_map_lookup_elem", "errno=%d\n", errno))
+		goto cleanup;
+
+	CHECK_FAIL(value != 42);
+
+cleanup:
+	test_hash_large_key__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
new file mode 100644
index 000000000000..473a22794a62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, struct bigelement);
+	__type(value, __u32);
+} hash_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct bigelement);
+} key_map SEC(".maps");
+
+struct bigelement {
+	int a;
+	char b[4096];
+	long long c;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int bpf_hash_large_key_test(void *ctx)
+{
+	int zero = 0, err = 1, value = 42;
+	struct bigelement *key;
+
+	key = bpf_map_lookup_elem(&key_map, &zero);
+	if (!key)
+		return 0;
+
+	key->c = 1;
+	if (bpf_map_update_elem(&hash_map, key, &value, BPF_ANY))
+		return 0;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0d92ebcb335d..0ad3e6305ff0 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1223,9 +1223,10 @@ out_map_in_map:
 
 static void test_map_large(void)
 {
+
 	struct bigkey {
 		int a;
-		char b[116];
+		char b[4096];
 		long long c;
 	} key;
 	int fd, i, value;
-- 
cgit v1.2.3


From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:40 +0000
Subject: bpf: Implement task local storage

Similar to bpf_local_storage for sockets and inodes add local storage
for task_struct.

The life-cycle of storage is managed with the life-cycle of the
task_struct.  i.e. the storage is destroyed along with the owning task
with a callback to the bpf_task_storage_free from the task_free LSM
hook.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
the security blob which are now stackable and can co-exist with other
LSMs.

The userspace map operations can be done by using a pid fd as a key
passed to the lookup, update and delete operations.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org
---
 tools/include/uapi/linux/bpf.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..f4037b2161a6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@ union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 8885274d225985807deeb95de74642073c3b97bb Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:41 +0000
Subject: libbpf: Add support for task local storage

Updates the bpf_probe_map_type API to also support
BPF_MAP_TYPE_TASK_STORAGE similar to other local storage maps.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-4-kpsingh@chromium.org
---
 tools/lib/bpf/libbpf_probes.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5482a9b7ae2d..ecaae2927ab8 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -230,6 +230,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		break;
 	case BPF_MAP_TYPE_SK_STORAGE:
 	case BPF_MAP_TYPE_INODE_STORAGE:
+	case BPF_MAP_TYPE_TASK_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
-- 
cgit v1.2.3


From 864ab0616dccf14e51618a24acc7cf23ddd2b98a Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:42 +0000
Subject: bpftool: Add support for task local storage

Updates the binary to handle the BPF_MAP_TYPE_TASK_STORAGE as
"task_storage" for printing and parsing. Also updates the documentation
and bash completion

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-5-kpsingh@chromium.org
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++-
 tools/bpf/bpftool/bash-completion/bpftool       | 2 +-
 tools/bpf/bpftool/map.c                         | 4 +++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index dade10cdf295..3d52256ba75f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -50,7 +50,8 @@ MAP COMMANDS
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
+|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
+		| **task_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 3f1da30c4da6..fdffbc64c65c 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -705,7 +705,7 @@ _bpftool()
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage' -- \
+                                struct_ops inode_storage task_storage' -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a7efbd84fbcc..b400364ee054 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -51,6 +51,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
 	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
+	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1464,7 +1465,8 @@ static int do_help(int argc, char **argv)
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
-		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
+		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
+		"		  task_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
-- 
cgit v1.2.3


From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:43 +0000
Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID

The currently available bpf_get_current_task returns an unsigned integer
which can be used along with BPF_CORE_READ to read data from
the task_struct but still cannot be used as an input argument to a
helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct.

In order to implement this helper a new return type, RET_PTR_TO_BTF_ID,
is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not
require checking the nullness of returned pointer.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org
---
 tools/include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4037b2161a6..9879d6793e90 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3779,6 +3779,14 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct task_struct *bpf_get_current_task_btf(void)
+ *	Description
+ *		Return a BTF pointer to the "current" task.
+ *		This pointer can also be used in helpers that accept an
+ *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
+ *	Return
+ *		Pointer to the current task.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3939,6 +3947,7 @@ union bpf_attr {
 	FN(redirect_peer),		\
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
+	FN(get_current_task_btf),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From f0e5ba0bc481df77cf0afac2b33e420b33eeb463 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:44 +0000
Subject: bpf: Fix tests for local_storage

The {inode,sk}_storage_result checking if the correct value was retrieved
was being clobbered unconditionally by the return value of the
bpf_{inode,sk}_storage_delete call.

Also, consistently use the newly added BPF_LOCAL_STORAGE_GET_F_CREATE
flag.

Fixes: cd324d7abb3d ("bpf: Add selftests for local_storage")
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-7-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/progs/local_storage.c | 24 ++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 0758ba229ae0..09529e33be98 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -58,20 +58,22 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
 	struct dummy_storage *storage;
+	int err;
 
 	if (pid != monitored_pid)
 		return 0;
 
 	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
+					BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
-	if (storage->value == DUMMY_STORAGE_VALUE)
+	if (storage->value != DUMMY_STORAGE_VALUE)
 		inode_storage_result = -1;
 
-	inode_storage_result =
-		bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+	err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+	if (!err)
+		inode_storage_result = err;
 
 	return 0;
 }
@@ -82,19 +84,23 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
 	struct dummy_storage *storage;
+	int err;
 
 	if (pid != monitored_pid)
 		return 0;
 
 	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
-	if (storage->value == DUMMY_STORAGE_VALUE)
+	if (storage->value != DUMMY_STORAGE_VALUE)
 		sk_storage_result = -1;
 
-	sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	if (!err)
+		sk_storage_result = err;
+
 	return 0;
 }
 
@@ -109,7 +115,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 		return 0;
 
 	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
@@ -131,7 +137,7 @@ int BPF_PROG(file_open, struct file *file)
 		return 0;
 
 	storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
-				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+					BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
-- 
cgit v1.2.3


From a367efa71b3f5a53281ca9772f8bf43166dfdf5f Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:45 +0000
Subject: bpf: Update selftests for local_storage to use vmlinux.h

With the fixing of BTF pruning of embedded types being fixed, the test
can be simplified to use vmlinux.h

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-8-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/progs/local_storage.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 09529e33be98..ef3822bc7542 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -4,9 +4,8 @@
  * Copyright 2020 Google LLC.
  */
 
+#include "vmlinux.h"
 #include <errno.h>
-#include <linux/bpf.h>
-#include <stdbool.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -36,23 +35,6 @@ struct {
 	__type(value, struct dummy_storage);
 } sk_storage_map SEC(".maps");
 
-/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
- */
-struct sock {} __attribute__((preserve_access_index));
-struct sockaddr {} __attribute__((preserve_access_index));
-struct socket {
-	struct sock *sk;
-} __attribute__((preserve_access_index));
-
-struct inode {} __attribute__((preserve_access_index));
-struct dentry {
-	struct inode *d_inode;
-} __attribute__((preserve_access_index));
-struct file {
-	struct inode *f_inode;
-} __attribute__((preserve_access_index));
-
-
 SEC("lsm/inode_unlink")
 int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
-- 
cgit v1.2.3


From 9cde3beeadb311d4b435a7d28d5ab72bcc5de65d Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:46 +0000
Subject: bpf: Add tests for task_local_storage

The test exercises the syscall based map operations by creating a pidfd
for the current process.

For verifying kernel / LSM functionality, the test implements a simple
MAC policy which denies an executable from unlinking itself. The LSM
program bprm_committed_creds sets a task_local_storage with a pointer to
the inode. This is then used to detect if the task is trying to unlink
itself in the inode_unlink LSM hook.

The test copies /bin/rm to /tmp and executes it in a child thread with
the intention of deleting itself. A successful test should prevent the
the running executable from deleting itself.

The bpf programs are also updated to call bpf_spin_{lock, unlock} to
trigger the verfier checks for spin locks.

The temporary file is cleaned up later in the test.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-9-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_local_storage.c  | 185 +++++++++++++++++++--
 tools/testing/selftests/bpf/progs/local_storage.c  |  61 ++++++-
 2 files changed, 226 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 91cd6f357246..4e7f6a4965f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -4,30 +4,161 @@
  * Copyright (C) 2020 Google LLC.
  */
 
+#include <asm-generic/errno-base.h>
+#include <sys/stat.h>
 #include <test_progs.h>
 #include <linux/limits.h>
 
 #include "local_storage.skel.h"
 #include "network_helpers.h"
 
-int create_and_unlink_file(void)
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 {
-	char fname[PATH_MAX] = "/tmp/fileXXXXXX";
-	int fd;
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out,
+				      loff_t *off_out, size_t len,
+				      unsigned int flags)
+{
+	return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out,
+		       len, flags);
+}
+
+static unsigned int duration;
+
+#define TEST_STORAGE_VALUE 0xbeefdead
 
-	fd = mkstemp(fname);
-	if (fd < 0)
-		return fd;
+struct storage {
+	void *inode;
+	unsigned int value;
+	/* Lock ensures that spin locked versions of local stoage operations
+	 * also work, most operations in this tests are still single threaded
+	 */
+	struct bpf_spin_lock lock;
+};
+
+/* Copies an rm binary to a temp file. dest is a mkstemp template */
+static int copy_rm(char *dest)
+{
+	int fd_in, fd_out = -1, ret = 0;
+	struct stat stat;
+
+	fd_in = open("/bin/rm", O_RDONLY);
+	if (fd_in < 0)
+		return -errno;
+
+	fd_out = mkstemp(dest);
+	if (fd_out < 0) {
+		ret = -errno;
+		goto out;
+	}
+
+	ret = fstat(fd_in, &stat);
+	if (ret == -1) {
+		ret = -errno;
+		goto out;
+	}
+
+	ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0);
+	if (ret == -1) {
+		ret = -errno;
+		goto out;
+	}
+
+	/* Set executable permission on the copied file */
+	ret = chmod(dest, 0100);
+	if (ret == -1)
+		ret = -errno;
+
+out:
+	close(fd_in);
+	close(fd_out);
+	return ret;
+}
+
+/* Fork and exec the provided rm binary and return the exit code of the
+ * forked process and its pid.
+ */
+static int run_self_unlink(int *monitored_pid, const char *rm_path)
+{
+	int child_pid, child_status, ret;
+	int null_fd;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		null_fd = open("/dev/null", O_WRONLY);
+		dup2(null_fd, STDOUT_FILENO);
+		dup2(null_fd, STDERR_FILENO);
+		close(null_fd);
+
+		*monitored_pid = getpid();
+		/* Use the copied /usr/bin/rm to delete itself
+		 * /tmp/copy_of_rm /tmp/copy_of_rm.
+		 */
+		ret = execlp(rm_path, rm_path, rm_path, NULL);
+		if (ret)
+			exit(errno);
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		return WEXITSTATUS(child_status);
+	}
+
+	return -EINVAL;
+}
 
-	close(fd);
-	unlink(fname);
-	return 0;
+static bool check_syscall_operations(int map_fd, int obj_fd)
+{
+	struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
+		       lookup_val = { .value = 0, .lock = { 0 } };
+	int err;
+
+	/* Looking up an existing element should fail initially */
+	err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
+					BPF_F_LOCK);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "err:%d errno:%d\n", err, errno))
+		return false;
+
+	/* Create a new element */
+	err = bpf_map_update_elem(map_fd, &obj_fd, &val,
+				  BPF_NOEXIST | BPF_F_LOCK);
+	if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
+		  errno))
+		return false;
+
+	/* Lookup the newly created element */
+	err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
+					BPF_F_LOCK);
+	if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
+		  errno))
+		return false;
+
+	/* Check the value of the newly created element */
+	if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem",
+		  "value got = %x errno:%d", lookup_val.value, val.value))
+		return false;
+
+	err = bpf_map_delete_elem(map_fd, &obj_fd);
+	if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err,
+		  errno))
+		return false;
+
+	/* The lookup should fail, now that the element has been deleted */
+	err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
+					BPF_F_LOCK);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "err:%d errno:%d\n", err, errno))
+		return false;
+
+	return true;
 }
 
 void test_test_local_storage(void)
 {
+	char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX";
+	int err, serv_sk = -1, task_fd = -1;
 	struct local_storage *skel = NULL;
-	int err, duration = 0, serv_sk = -1;
 
 	skel = local_storage__open_and_load();
 	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -37,12 +168,37 @@ void test_test_local_storage(void)
 	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
 		goto close_prog;
 
-	skel->bss->monitored_pid = getpid();
+	task_fd = sys_pidfd_open(getpid(), 0);
+	if (CHECK(task_fd < 0, "pidfd_open",
+		  "failed to get pidfd err:%d, errno:%d", task_fd, errno))
+		goto close_prog;
 
-	err = create_and_unlink_file();
-	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+	if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map),
+				      task_fd))
 		goto close_prog;
 
+	err = copy_rm(tmp_exec_path);
+	if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	/* Sets skel->bss->monitored_pid to the pid of the forked child
+	 * forks a child process that executes tmp_exec_path and tries to
+	 * unlink its executable. This operation should be denied by the loaded
+	 * LSM program.
+	 */
+	err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path);
+	if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err))
+		goto close_prog_unlink;
+
+	/* Set the process being monitored to be the current process */
+	skel->bss->monitored_pid = getpid();
+
+	/* Remove the temporary created executable */
+	err = unlink(tmp_exec_path);
+	if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path,
+		  errno))
+		goto close_prog_unlink;
+
 	CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
 	      "inode_local_storage not set\n");
 
@@ -55,6 +211,9 @@ void test_test_local_storage(void)
 
 	close(serv_sk);
 
+close_prog_unlink:
+	unlink(tmp_exec_path);
 close_prog:
+	close(task_fd);
 	local_storage__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index ef3822bc7542..3e3de130f28f 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -17,41 +17,64 @@ int monitored_pid = 0;
 int inode_storage_result = -1;
 int sk_storage_result = -1;
 
-struct dummy_storage {
+struct local_storage {
+	struct inode *exec_inode;
 	__u32 value;
+	struct bpf_spin_lock lock;
 };
 
 struct {
 	__uint(type, BPF_MAP_TYPE_INODE_STORAGE);
 	__uint(map_flags, BPF_F_NO_PREALLOC);
 	__type(key, int);
-	__type(value, struct dummy_storage);
+	__type(value, struct local_storage);
 } inode_storage_map SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
 	__type(key, int);
-	__type(value, struct dummy_storage);
+	__type(value, struct local_storage);
 } sk_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct local_storage);
+} task_storage_map SEC(".maps");
+
 SEC("lsm/inode_unlink")
 int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
+	bool is_self_unlink;
 	int err;
 
 	if (pid != monitored_pid)
 		return 0;
 
+	storage = bpf_task_storage_get(&task_storage_map,
+				       bpf_get_current_task_btf(), 0, 0);
+	if (storage) {
+		/* Don't let an executable delete itself */
+		bpf_spin_lock(&storage->lock);
+		is_self_unlink = storage->exec_inode == victim->d_inode;
+		bpf_spin_unlock(&storage->lock);
+		if (is_self_unlink)
+			return -EPERM;
+	}
+
 	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
 					BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	if (storage->value != DUMMY_STORAGE_VALUE)
 		inode_storage_result = -1;
+	bpf_spin_unlock(&storage->lock);
 
 	err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
 	if (!err)
@@ -65,7 +88,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 	     int addrlen)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
 	int err;
 
 	if (pid != monitored_pid)
@@ -76,8 +99,10 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	if (storage->value != DUMMY_STORAGE_VALUE)
 		sk_storage_result = -1;
+	bpf_spin_unlock(&storage->lock);
 
 	err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
 	if (!err)
@@ -91,7 +116,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	     int protocol, int kern)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
 
 	if (pid != monitored_pid)
 		return 0;
@@ -101,7 +126,9 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	storage->value = DUMMY_STORAGE_VALUE;
+	bpf_spin_unlock(&storage->lock);
 
 	return 0;
 }
@@ -110,7 +137,7 @@ SEC("lsm/file_open")
 int BPF_PROG(file_open, struct file *file)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
 
 	if (pid != monitored_pid)
 		return 0;
@@ -123,6 +150,26 @@ int BPF_PROG(file_open, struct file *file)
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	storage->value = DUMMY_STORAGE_VALUE;
+	bpf_spin_unlock(&storage->lock);
 	return 0;
 }
+
+/* This uses the local storage to remember the inode of the binary that a
+ * process was originally executing.
+ */
+SEC("lsm/bprm_committed_creds")
+void BPF_PROG(exec, struct linux_binprm *bprm)
+{
+	struct local_storage *storage;
+
+	storage = bpf_task_storage_get(&task_storage_map,
+				       bpf_get_current_task_btf(), 0,
+				       BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (storage) {
+		bpf_spin_lock(&storage->lock);
+		storage->exec_inode = bprm->file->f_inode;
+		bpf_spin_unlock(&storage->lock);
+	}
+}
-- 
cgit v1.2.3


From 4170bc6baa5446e1d85e0b7647ea54ba72aa85c4 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:47 +0000
Subject: bpf: Exercise syscall operations for inode and sk storage

Use the check_syscall_operations added for task_local_storage to
exercise syscall operations for other local storage maps:

* Check the absence of an element for the given fd.
* Create a new element, retrieve and compare its value.
* Delete the element and check again for absence.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-10-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_local_storage.c       | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 4e7f6a4965f2..5fda45982be0 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -157,7 +157,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
 void test_test_local_storage(void)
 {
 	char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX";
-	int err, serv_sk = -1, task_fd = -1;
+	int err, serv_sk = -1, task_fd = -1, rm_fd = -1;
 	struct local_storage *skel = NULL;
 
 	skel = local_storage__open_and_load();
@@ -181,6 +181,15 @@ void test_test_local_storage(void)
 	if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno))
 		goto close_prog;
 
+	rm_fd = open(tmp_exec_path, O_RDONLY);
+	if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d",
+		  tmp_exec_path, rm_fd, errno))
+		goto close_prog;
+
+	if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map),
+				      rm_fd))
+		goto close_prog;
+
 	/* Sets skel->bss->monitored_pid to the pid of the forked child
 	 * forks a child process that executes tmp_exec_path and tries to
 	 * unlink its executable. This operation should be denied by the loaded
@@ -209,11 +218,15 @@ void test_test_local_storage(void)
 	CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
 	      "sk_local_storage not set\n");
 
-	close(serv_sk);
+	if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map),
+				      serv_sk))
+		goto close_prog;
 
 close_prog_unlink:
 	unlink(tmp_exec_path);
 close_prog:
+	close(serv_sk);
+	close(rm_fd);
 	close(task_fd);
 	local_storage__destroy(skel);
 }
-- 
cgit v1.2.3


From 21584e6a92bd2a85411793c0da3d48ab327e9b72 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 4 Nov 2020 15:30:40 +0200
Subject: selftests: netdevsim: Add test for nexthop offload API

Test various aspects of the nexthop offload API on top of the netdevsim
implementation. Both good and bad flows are tested.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/netdevsim/nexthop.sh     | 436 +++++++++++++++++++++
 1 file changed, 436 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/nexthop.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
new file mode 100755
index 000000000000..be0c1b5ee6b8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the nexthop offload API. It makes use of netdevsim
+# which registers a listener to the nexthop notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	nexthop_single_add_test
+	nexthop_single_add_err_test
+	nexthop_group_add_test
+	nexthop_group_add_err_test
+	nexthop_group_replace_test
+	nexthop_group_replace_err_test
+	nexthop_single_replace_test
+	nexthop_single_replace_err_test
+	nexthop_single_in_group_replace_test
+	nexthop_single_in_group_replace_err_test
+	nexthop_single_in_group_delete_test
+	nexthop_single_in_group_delete_err_test
+	nexthop_replay_test
+	nexthop_replay_err_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+nexthop_check()
+{
+	local nharg="$1"; shift
+	local expected="$1"; shift
+
+	out=$($IP nexthop show ${nharg} | sed -e 's/ *$//')
+	if [[ "$out" != "$expected" ]]; then
+		return 1
+	fi
+
+	return 0
+}
+
+nexthop_resource_check()
+{
+	local expected_occ=$1; shift
+
+	occ=$($DEVLINK -jp resource show $DEVLINK_DEV \
+		| jq '.[][][] | select(.name=="nexthops") | .["occ"]')
+
+	if [ $expected_occ -ne $occ ]; then
+		return 1
+	fi
+
+	return 0
+}
+
+nexthop_resource_set()
+{
+	local size=$1; shift
+
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size $size
+	$DEVLINK dev reload $DEVLINK_DEV
+}
+
+nexthop_single_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 1
+	nexthop_resource_check 0
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Single nexthop add and delete"
+}
+
+nexthop_single_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 1
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop addition succeeded when should fail"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_group_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	$IP nexthop add id 10 group 1,20/2,39
+	nexthop_check "id 10" "id 10 group 1,20/2,39 trap"
+	check_err $? "Unexpected weighted nexthop group entry"
+
+	nexthop_resource_check 61
+	check_err $? "Wrong weighted nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Nexthop group add and delete"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_group_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 2
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 &> /dev/null
+	check_fail $? "Nexthop group addition succeeded when should fail"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop group add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 10 group 1/2/3
+	nexthop_check "id 10" "id 10 group 1/2/3 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop group replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_group_replace_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 5
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 10 group 1/2/3 &> /dev/null
+	check_fail $? "Nexthop group replacement succeeded when should fail"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Nexthop group replace failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop replace id 1 via 192.0.2.3 dev dummy1
+	nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_err_test()
+{
+	RET=0
+
+	# This is supposed to cause the replace to fail because the new nexthop
+	# is programmed before deleting the replaced one.
+	nexthop_resource_set 1
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replace succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Single nexthop replace failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+	check_err $? "Failed to replace nexthop when should not"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_replace_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 5
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replacement succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in group failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_delete_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop del id 1
+	nexthop_check "id 10" "id 10 group 2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_delete_err_test()
+{
+	RET=0
+
+	# First, nexthop 1 will be deleted, which will reduce the occupancy to
+	# 5. Afterwards, a replace notification will be sent for nexthop group
+	# 10 with only two nexthops. Since the new group is allocated before
+	# the old is deleted, the replacement will fail as it will result in an
+	# occupancy of 7.
+	nexthop_resource_set 6
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2/3
+
+	$IP nexthop del id 1
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in group failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_replay_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$DEVLINK dev reload $DEVLINK_DEV
+	check_err $? "Failed to reload when should not"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after reload"
+
+	nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after reload"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after reload"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop replay"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_replay_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	# Reduce size of nexthop resource so that reload will fail.
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size 3
+	$DEVLINK dev reload $DEVLINK_DEV &> /dev/null
+	check_fail $? "Reload succeeded when should fail"
+
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size 9999
+	$DEVLINK dev reload $DEVLINK_DEV
+	check_err $? "Failed to reload when should not"
+
+	log_test "Nexthop replay failure"
+
+	$IP nexthop flush &> /dev/null
+}
+
+setup_prepare()
+{
+	local netdev
+
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	set -e
+
+	ip netns add testns1
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	IP="ip -netns testns1"
+	DEVLINK="devlink -N testns1"
+
+	$IP link add name dummy1 up type dummy
+	$IP address add 192.0.2.1/24 dev dummy1
+
+	set +e
+}
+
+cleanup()
+{
+	pre_cleanup
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From f055f355faf1991ef4e6b3c3517f8f2fc247805e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 6 Nov 2020 12:33:46 -0800
Subject: selftests/bpf: Fix selftest build with old libc

pidfd_open was added in 2019. Some versions of libc library don't define it.
Define it manually if it's not available.

Reported-by: Sergei Iudin <siudin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/test_local_storage.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 5fda45982be0..fcca7ba1f368 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -12,6 +12,10 @@
 #include "local_storage.skel.h"
 #include "network_helpers.h"
 
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+
 static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 {
 	return syscall(__NR_pidfd_open, pid, flags);
-- 
cgit v1.2.3


From 899f317e4886f916ed21027177177c11b577cea1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 9 Sep 2020 12:27:03 -0700
Subject: rcuscale: Add RCU Tasks Trace

This commit adds the ability to test performance and scalability of RCU
Tasks Trace updaters.

Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/configs/rcuscale/CFcommon        |  3 +++
 .../testing/selftests/rcutorture/configs/rcuscale/TRACE01 | 15 +++++++++++++++
 .../selftests/rcutorture/configs/rcuscale/TRACE01.boot    |  1 +
 3 files changed, 19 insertions(+)
 create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
 create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 87caa0e932c7..90942bb5bebc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -1,2 +1,5 @@
 CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_TASKS_RCU_GENERIC=y
+CONFIG_TASKS_RCU=y
+CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
new file mode 100644
index 000000000000..e6baa2fbaeb3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -0,0 +1,15 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot
new file mode 100644
index 000000000000..af0aff1457a4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot
@@ -0,0 +1 @@
+rcuscale.scale_type=tasks-tracing
-- 
cgit v1.2.3


From 45c7b962014da36c2ac1aee6e5014b644ba37a84 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 9 Sep 2020 22:24:57 -0700
Subject: rcuscale: Avoid divide by zero

The rcuscale test module does not use batches, so there is only
ever one batch.  This commit therefore informs the kvm-recheck-rcuscale.sh
script of this fact of life.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index aa745152a525..b582113178ac 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -32,7 +32,7 @@ sed -e 's/^\[[^]]*]//' < $i/console.log |
 awk '
 /-scale: .* gps: .* batches:/ {
 	ngps = $9;
-	nbatches = $11;
+	nbatches = 1;
 }
 
 /-scale: .*writer-duration/ {
-- 
cgit v1.2.3


From 8d68e68a781db80606c8e8f3e4383be6974878fd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 16 Sep 2020 10:10:10 -0700
Subject: torture: Exclude "NOHZ tick-stop error" from fatal errors

The "NOHZ tick-stop error: Non-RCU local softirq work is pending"
warning happens frequently and appears to be irrelevant to the various
torture tests.  This commit therefore filters it out.

If there proves to be a need to pay attention to it a later commit will
add an "advice" category to allow the user to immediately see that
although something happened, it was not an indictment of the system
being tortured.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/console-badness.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index 0e4c0b2eb7f0..80ae7f08b363 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -13,4 +13,5 @@
 egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
 grep -v 'ODEBUG: ' |
 grep -v 'This means that this is a DEBUG kernel and it is' |
-grep -v 'Warning: unable to open an initial console'
+grep -v 'Warning: unable to open an initial console' |
+grep -v 'NOHZ tick-stop error: Non-RCU local softirq work is pending, handler'
-- 
cgit v1.2.3


From 6f26d010e678249367cc00b5a827c3731c8138f3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 17 Sep 2020 12:34:01 -0700
Subject: rcutorture: Adjust scenarios SRCU-t and SRCU-u to make kconfig happy

The SRCU-u scenario expects to enable lockdep but to also disable the
CONFIG_PREEMPT_COUNT kconfig option.  This no longer works.  This commit
therefore instead enables lockdep in SRCU-t, which then allows SRCU-u
to disable CONFIG_PREEMPT_COUNT.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-t | 3 ++-
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-u | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
index 6c78022c8cd8..d6557c38dfe4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -4,7 +4,8 @@ CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_TINY_SRCU=y
 CONFIG_RCU_TRACE=n
-CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 #CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
index c15ada821e45..6bc24e99862f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -4,7 +4,6 @@ CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_TINY_SRCU=y
 CONFIG_RCU_TRACE=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_PREEMPT_COUNT=n
-- 
cgit v1.2.3


From c64659ef29e3901be0900ec6fb0485fa3dbdcfd8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 18 Sep 2020 13:26:22 -0700
Subject: torture: Prevent jitter processes from delaying failed run

Even when the kernel panics and qemu dies, runs with jitter enabled will
continue uselessly until the jitter.sh processes terminate.  This can
be annoying if a planned one-hour run instead dies during boot.

This commit therefore kills the jitter.sh processes when the run ends
more than one minute prior to the termination time specified by the
kvm.sh --duration argument or its default.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 14 ++++++++++++++
 tools/testing/selftests/rcutorture/bin/kvm.sh            |  5 ++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index d04966ab88cc..3cd03d01857c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -226,6 +226,20 @@ do
 				echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
 				ps -fp $killpid >> $resdir/Warnings 2>&1
 			fi
+			# Reduce probability of PID reuse by allowing a one-minute buffer
+			if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids"
+			then
+				awk < "$resdir/../jitter_pids" '
+				NF > 0 {
+					pidlist = pidlist " " $1;
+					n++;
+				}
+				END {
+					if (n > 0) {
+						print "kill " pidlist;
+					}
+				}' | sh
+			fi
 		else
 			echo ' ---' `date`: "Kernel done"
 		fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6eb1d3f6524d..5ad3882563ce 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -459,8 +459,11 @@ function dump(first, pastlast, batchnum)
 	print "if test -n \"$needqemurun\""
 	print "then"
 	print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
-	for (j = 0; j < njitter; j++)
+	print "\techo > " rd "jitter_pids"
+	for (j = 0; j < njitter; j++) {
 		print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
+		print "\techo $! >> " rd "jitter_pids"
+	}
 	print "\twait"
 	print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
 	print "else"
-- 
cgit v1.2.3


From c1e06287583e5ec496e4c02bf5b319e5e41a1fd2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 21 Sep 2020 13:18:48 -0700
Subject: torture: Force weak-hashed pointers on console log

Although the rcutorture scripting now deals correctly with full-up
security-induced pointer obfuscation, it is still counter-productive for
kernel hackers who are analyzing console output.  This commit therefore
sets the debug_boot_weak_hash kernel boot parameter, which enables
printing of weak-hashed pointers for torture-test runs.

Please note that this change applies only to runs initiated by the
kvm.sh scripting.  If you are instead using modprobe and rmmod, it is
your responsibility to build and boot the underlying kernel to your taste.

Please note further that this change does not result in a security hole
in normal use.  The rcutorture testing runs with a negligible userspace,
no networking, and no user interaction.  Besides which, there is no data
of value that can be extracted from an rcutorture guest OS that could
not also be extracted from the host that this guest is running on.

Suggested-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/functions.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 51f3464b96d3..82663495fb38 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -169,6 +169,7 @@ identify_qemu () {
 # Output arguments for the qemu "-append" string based on CPU type
 # and the TORTURE_QEMU_INTERACTIVE environment variable.
 identify_qemu_append () {
+	echo debug_boot_weak_hash
 	local console=ttyS0
 	case "$1" in
 	qemu-system-x86_64|qemu-system-i386)
-- 
cgit v1.2.3


From 7de1ca35269ee20e40c35666c810cbaea528c719 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 22 Sep 2020 17:20:11 -0700
Subject: torture: Accept time units on kvm.sh --duration argument

The "--duration <minutes>" has worked well for a very long time, but
it can be inconvenient to compute the minutes for (say) a 28-hour run.
It can also be annoying to have to let a simple boot test run for a full
minute.  This commit therefore permits an "s" suffix to specify seconds,
"m" to specify minutes (which remains the default), "h" suffix to specify
hours, and "d" to specify days.

With this change, "--duration 5" still specifies that each scenario
run for five minutes, but "--duration 30s" runs for only 30 seconds,
"--duration 8h" runs for eight hours, and "--duration 2d" runs for
two days.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 5ad3882563ce..c348d962304f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -58,7 +58,7 @@ usage () {
 	echo "       --datestamp string"
 	echo "       --defconfig string"
 	echo "       --dryrun sched|script"
-	echo "       --duration minutes"
+	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
 	echo "       --gdb"
 	echo "       --help"
 	echo "       --interactive"
@@ -128,8 +128,20 @@ do
 		shift
 		;;
 	--duration)
-		checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
-		dur=$(($2*60))
+		checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error'
+		mult=60
+		if echo "$2" | grep -q 's$'
+		then
+			mult=1
+		elif echo "$2" | grep -q 'h$'
+		then
+			mult=3600
+		elif echo "$2" | grep -q 'd$'
+		then
+			mult=86400
+		fi
+		ts=`echo $2 | sed -e 's/[smhd]$//'`
+		dur=$(($ts*mult))
 		shift
 		;;
 	--gdb)
-- 
cgit v1.2.3


From a5136f4ffb44f8c1a80406c5bfd4d233433398e6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 24 Sep 2020 08:52:33 -0700
Subject: torture: Allow alternative forms of kvm.sh command-line arguments

This commit allows --build-only as a synonym for --buildonly, --kconfigs
for --kconfig, and --kmake-args for --kmake-arg.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index c348d962304f..45d07b7b69f5 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -93,7 +93,7 @@ do
 		TORTURE_BOOT_IMAGE="$2"
 		shift
 		;;
-	--buildonly)
+	--buildonly|--build-only)
 		TORTURE_BUILDONLY=1
 		;;
 	--configs|--config)
@@ -160,7 +160,7 @@ do
 		jitter="$2"
 		shift
 		;;
-	--kconfig)
+	--kconfig|--kconfigs)
 		checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
 		TORTURE_KCONFIG_ARG="$2"
 		shift
@@ -171,7 +171,7 @@ do
 	--kcsan)
 		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
 		;;
-	--kmake-arg)
+	--kmake-arg|--kmake-args)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
 		TORTURE_KMAKE_ARG="$2"
 		shift
-- 
cgit v1.2.3


From 6c5b9de2c63b2f513a580c6c80d455350012e99b Mon Sep 17 00:00:00 2001
From: Samuel Hernandez <sam.hernandez.amador@gmail.com>
Date: Sun, 11 Oct 2020 14:22:31 -0400
Subject: rcutorture/nolibc: Fix a typo in header file

This fixes a typo. Before this, the AT_FDCWD macro would be defined
regardless of whether or not it's been defined before.

Signed-off-by: Samuel Hernandez <sam.hernandez.amador@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 2551e9b71167..d6d2623c99ad 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -231,7 +231,7 @@ struct rusage {
 #define DT_SOCK   12
 
 /* all the *at functions */
-#ifndef AT_FDWCD
+#ifndef AT_FDCWD
 #define AT_FDCWD             -100
 #endif
 
-- 
cgit v1.2.3


From 5be7d80deb80ceef50a6bd86d83c8fd62264778a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Oct 2020 11:42:17 -0700
Subject: torture: Make kvm-check-branches.sh use --allcpus

Currently the kvm-check-branches.sh script calculates the number of CPUs
and passes this to the kvm.sh --cpus command-line argument.  This works,
but this commit saves a line by instead using the new kvm.sh --allcpus
command-line argument.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index 6e65c134e5f1..370406bbfeed 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -52,8 +52,7 @@ echo Results directory: $resdir/$ds
 KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
 PATH=${KVM}/bin:$PATH; export PATH
 . functions.sh
-cpus="`identify_qemu_vcpus`"
-echo Using up to $cpus CPUs.
+echo Using all `identify_qemu_vcpus` CPUs.
 
 # Each pass through this loop does one command-line argument.
 for gitbr in $@
@@ -74,7 +73,7 @@ do
 		# Test the specified commit.
 		git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
 		echo git checkout return code: $? "(Commit $ntry: $i)"
-		kvm.sh --cpus $cpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
+		kvm.sh --allcpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
 		ret=$?
 		echo kvm.sh return code $ret for commit $i from branch $gitbr
 
-- 
cgit v1.2.3


From 06dc8d4591b8d8ce0ece94474718b53f0a5c5de3 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Tue, 20 Oct 2020 21:22:56 +0200
Subject: tools/nolibc:  Fix a spelling error in a comment

Fix a spelling in the comment line.

s/memry/memory/p

This is on linux-next.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index d6d2623c99ad..e61d36cd4e50 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -107,7 +107,7 @@ static int errno;
 #endif
 
 /* errno codes all ensure that they will not conflict with a valid pointer
- * because they all correspond to the highest addressable memry page.
+ * because they all correspond to the highest addressable memory page.
  */
 #define MAX_ERRNO 4095
 
-- 
cgit v1.2.3


From 01f9e708d9eae6335ae9ff25ab09893c20727a55 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 2 Nov 2020 18:12:20 +0100
Subject: tools/rcutorture: Fix BUG parsing of console.log

For the rcutorture test summary log file console.log of virtual machines is
parsed. When a console.log contains "DEBUG", BUG counter is incremented
because regular expression does not handle to ignore DEBUG.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Reviewed-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/parse-console.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index e03338091a06..263b1be50008 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -133,7 +133,7 @@ then
 	then
 		summary="$summary  Warnings: $n_warn"
 	fi
-	n_bugs=`egrep -c 'BUG|Oops:' $file`
+	n_bugs=`egrep -c '\bBUG|Oops:' $file`
 	if test "$n_bugs" -ne 0
 	then
 		summary="$summary  Bugs: $n_bugs"
-- 
cgit v1.2.3


From ebb477cb2fb7a44ff600e0a7393bad906a0ecd80 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 11 Aug 2020 11:27:33 -0700
Subject: tools/memory-model: Document categories of ordering primitives

The Linux kernel has a number of categories of ordering primitives, which
are recorded in the LKMM implementation and hinted at by cheatsheet.txt.
But there is no overview of these categories, and such an overview
is needed in order to understand multithreaded LKMM litmus tests.
This commit therefore adds an ordering.txt as well as extracting a
control-dependencies.txt from memory-barriers.txt.  It also updates the
README file.

[ paulmck:  Apply Akira Yokosawa file-placement feedback. ]
[ paulmck:  Apply Alan Stern feedback. ]
[ paulmck:  Apply self-review feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/README            |  17 +
 .../Documentation/control-dependencies.txt         | 258 ++++++++++
 tools/memory-model/Documentation/ordering.txt      | 556 +++++++++++++++++++++
 3 files changed, 831 insertions(+)
 create mode 100644 tools/memory-model/Documentation/control-dependencies.txt
 create mode 100644 tools/memory-model/Documentation/ordering.txt

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
index 2d9539f19912..db90a26dbdf4 100644
--- a/tools/memory-model/Documentation/README
+++ b/tools/memory-model/Documentation/README
@@ -11,6 +11,12 @@ the material provided by documents earlier in this list.
 
 o	You are new to Linux-kernel concurrency: simple.txt
 
+o	You have some background in Linux-kernel concurrency, and would
+	like an overview of the types of low-level concurrency primitives
+	that the Linux kernel provides:  ordering.txt
+
+	Here, "low level" means atomic operations to single variables.
+
 o	You are familiar with the Linux-kernel concurrency primitives
 	that you need, and just want to get started with LKMM litmus
 	tests:  litmus-tests.txt
@@ -19,6 +25,9 @@ o	You are familiar with Linux-kernel concurrency, and would
 	like a detailed intuitive understanding of LKMM, including
 	situations involving more than two threads:  recipes.txt
 
+o	You would like a detailed understanding of what your compiler can
+	and cannot do to control dependencies:  control-dependencies.txt
+
 o	You are familiar with Linux-kernel concurrency and the use of
 	LKMM, and would like a quick reference:  cheatsheet.txt
 
@@ -41,6 +50,10 @@ README
 cheatsheet.txt
 	Quick-reference guide to the Linux-kernel memory model.
 
+control-dependencies.txt
+	Guide to preventing compiler optimizations from destroying
+	your control dependencies.
+
 explanation.txt
 	Detailed description of the memory model.
 
@@ -48,6 +61,10 @@ litmus-tests.txt
 	The format, features, capabilities, and limitations of the litmus
 	tests that LKMM can evaluate.
 
+ordering.txt
+	Overview of the Linux kernel's low-level memory-ordering
+	primitives by category.
+
 recipes.txt
 	Common memory-ordering patterns.
 
diff --git a/tools/memory-model/Documentation/control-dependencies.txt b/tools/memory-model/Documentation/control-dependencies.txt
new file mode 100644
index 000000000000..8b743d20fe27
--- /dev/null
+++ b/tools/memory-model/Documentation/control-dependencies.txt
@@ -0,0 +1,258 @@
+CONTROL DEPENDENCIES
+====================
+
+A major difficulty with control dependencies is that current compilers
+do not support them.  One purpose of this document is therefore to
+help you prevent your compiler from breaking your code.  However,
+control dependencies also pose other challenges, which leads to the
+second purpose of this document, namely to help you to avoid breaking
+your own code, even in the absence of help from your compiler.
+
+One such challenge is that control dependencies order only later stores.
+Therefore, a load-load control dependency will not preserve ordering
+unless a read memory barrier is provided.  Consider the following code:
+
+	q = READ_ONCE(a);
+	if (q)
+		p = READ_ONCE(b);
+
+This is not guaranteed to provide any ordering because some types of CPUs
+are permitted to predict the result of the load from "b".  This prediction
+can cause other CPUs to see this load as having happened before the load
+from "a".  This means that an explicit read barrier is required, for example
+as follows:
+
+	q = READ_ONCE(a);
+	if (q) {
+		smp_rmb();
+		p = READ_ONCE(b);
+	}
+
+However, stores are not speculated.  This means that ordering is
+(usually) guaranteed for load-store control dependencies, as in the
+following example:
+
+	q = READ_ONCE(a);
+	if (q)
+		WRITE_ONCE(b, 1);
+
+Control dependencies can pair with each other and with other types
+of ordering.  But please note that neither the READ_ONCE() nor the
+WRITE_ONCE() are optional.  Without the READ_ONCE(), the compiler might
+fuse the load from "a" with other loads.  Without the WRITE_ONCE(),
+the compiler might fuse the store to "b" with other stores.  Worse yet,
+the compiler might convert the store into a load and a check followed
+by a store, and this compiler-generated load would not be ordered by
+the control dependency.
+
+Furthermore, if the compiler is able to prove that the value of variable
+"a" is always non-zero, it would be well within its rights to optimize
+the original example by eliminating the "if" statement as follows:
+
+	q = a;
+	b = 1;  /* BUG: Compiler and CPU can both reorder!!! */
+
+So don't leave out either the READ_ONCE() or the WRITE_ONCE().
+In particular, although READ_ONCE() does force the compiler to emit a
+load, it does *not* force the compiler to actually use the loaded value.
+
+It is tempting to try use control dependencies to enforce ordering on
+identical stores on both branches of the "if" statement as follows:
+
+	q = READ_ONCE(a);
+	if (q) {
+		barrier();
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		barrier();
+		WRITE_ONCE(b, 1);
+		do_something_else();
+	}
+
+Unfortunately, current compilers will transform this as follows at high
+optimization levels:
+
+	q = READ_ONCE(a);
+	barrier();
+	WRITE_ONCE(b, 1);  /* BUG: No ordering vs. load from a!!! */
+	if (q) {
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
+		do_something();
+	} else {
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
+		do_something_else();
+	}
+
+Now there is no conditional between the load from "a" and the store to
+"b", which means that the CPU is within its rights to reorder them:  The
+conditional is absolutely required, and must be present in the final
+assembly code, after all of the compiler and link-time optimizations
+have been applied.  Therefore, if you need ordering in this example,
+you must use explicit memory ordering, for example, smp_store_release():
+
+	q = READ_ONCE(a);
+	if (q) {
+		smp_store_release(&b, 1);
+		do_something();
+	} else {
+		smp_store_release(&b, 1);
+		do_something_else();
+	}
+
+Without explicit memory ordering, control-dependency-based ordering is
+guaranteed only when the stores differ, for example:
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		WRITE_ONCE(b, 2);
+		do_something_else();
+	}
+
+The initial READ_ONCE() is still required to prevent the compiler from
+knowing too much about the value of "a".
+
+But please note that you need to be careful what you do with the local
+variable "q", otherwise the compiler might be able to guess the value
+and again remove the conditional branch that is absolutely required to
+preserve ordering.  For example:
+
+	q = READ_ONCE(a);
+	if (q % MAX) {
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		WRITE_ONCE(b, 2);
+		do_something_else();
+	}
+
+If MAX is compile-time defined to be 1, then the compiler knows that
+(q % MAX) must be equal to zero, regardless of the value of "q".
+The compiler is therefore within its rights to transform the above code
+into the following:
+
+	q = READ_ONCE(a);
+	WRITE_ONCE(b, 2);
+	do_something_else();
+
+Given this transformation, the CPU is not required to respect the ordering
+between the load from variable "a" and the store to variable "b".  It is
+tempting to add a barrier(), but this does not help.  The conditional
+is gone, and the barrier won't bring it back.  Therefore, if you need
+to relying on control dependencies to produce this ordering, you should
+make sure that MAX is greater than one, perhaps as follows:
+
+	q = READ_ONCE(a);
+	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
+	if (q % MAX) {
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		WRITE_ONCE(b, 2);
+		do_something_else();
+	}
+
+Please note once again that each leg of the "if" statement absolutely
+must store different values to "b".  As in previous examples, if the two
+values were identical, the compiler could pull this store outside of the
+"if" statement, destroying the control dependency's ordering properties.
+
+You must also be careful avoid relying too much on boolean short-circuit
+evaluation.  Consider this example:
+
+	q = READ_ONCE(a);
+	if (q || 1 > 0)
+		WRITE_ONCE(b, 1);
+
+Because the first condition cannot fault and the second condition is
+always true, the compiler can transform this example as follows, again
+destroying the control dependency's ordering:
+
+	q = READ_ONCE(a);
+	WRITE_ONCE(b, 1);
+
+This is yet another example showing the importance of preventing the
+compiler from out-guessing your code.  Again, although READ_ONCE() really
+does force the compiler to emit code for a given load, the compiler is
+within its rights to discard the loaded value.
+
+In addition, control dependencies apply only to the then-clause and
+else-clause of the "if" statement in question.  In particular, they do
+not necessarily order the code following the entire "if" statement:
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, 1);
+	} else {
+		WRITE_ONCE(b, 2);
+	}
+	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from "a". */
+
+It is tempting to argue that there in fact is ordering because the
+compiler cannot reorder volatile accesses and also cannot reorder
+the writes to "b" with the condition.  Unfortunately for this line
+of reasoning, the compiler might compile the two writes to "b" as
+conditional-move instructions, as in this fanciful pseudo-assembly
+language:
+
+	ld r1,a
+	cmp r1,$0
+	cmov,ne r4,$1
+	cmov,eq r4,$2
+	st r4,b
+	st $1,c
+
+The control dependencies would then extend only to the pair of cmov
+instructions and the store depending on them.  This means that a weakly
+ordered CPU would have no dependency of any sort between the load from
+"a" and the store to "c".  In short, control dependencies provide ordering
+only to the stores in the then-clause and else-clause of the "if" statement
+in question (including functions invoked by those two clauses), and not
+to code following that "if" statement.
+
+
+In summary:
+
+  (*) Control dependencies can order prior loads against later stores.
+      However, they do *not* guarantee any other sort of ordering:
+      Not prior loads against later loads, nor prior stores against
+      later anything.  If you need these other forms of ordering, use
+      smp_load_acquire(), smp_store_release(), or, in the case of prior
+      stores and later loads, smp_mb().
+
+  (*) If both legs of the "if" statement contain identical stores to
+      the same variable, then you must explicitly order those stores,
+      either by preceding both of them with smp_mb() or by using
+      smp_store_release().  Please note that it is *not* sufficient to use
+      barrier() at beginning and end of each leg of the "if" statement
+      because, as shown by the example above, optimizing compilers can
+      destroy the control dependency while respecting the letter of the
+      barrier() law.
+
+  (*) Control dependencies require at least one run-time conditional
+      between the prior load and the subsequent store, and this
+      conditional must involve the prior load.  If the compiler is able
+      to optimize the conditional away, it will have also optimized
+      away the ordering.  Careful use of READ_ONCE() and WRITE_ONCE()
+      can help to preserve the needed conditional.
+
+  (*) Control dependencies require that the compiler avoid reordering the
+      dependency into nonexistence.  Careful use of READ_ONCE() or
+      atomic{,64}_read() can help to preserve your control dependency.
+
+  (*) Control dependencies apply only to the then-clause and else-clause
+      of the "if" statement containing the control dependency, including
+      any functions that these two clauses call.  Control dependencies
+      do *not* apply to code beyond the end of that "if" statement.
+
+  (*) Control dependencies pair normally with other types of barriers.
+
+  (*) Control dependencies do *not* provide multicopy atomicity.  If you
+      need all the CPUs to agree on the ordering of a given store against
+      all other accesses, use smp_mb().
+
+  (*) Compilers do not understand control dependencies.  It is therefore
+      your job to ensure that they do not break your code.
diff --git a/tools/memory-model/Documentation/ordering.txt b/tools/memory-model/Documentation/ordering.txt
new file mode 100644
index 000000000000..9b0949d3f5ec
--- /dev/null
+++ b/tools/memory-model/Documentation/ordering.txt
@@ -0,0 +1,556 @@
+This document gives an overview of the categories of memory-ordering
+operations provided by the Linux-kernel memory model (LKMM).
+
+
+Categories of Ordering
+======================
+
+This section lists LKMM's three top-level categories of memory-ordering
+operations in decreasing order of strength:
+
+1.	Barriers (also known as "fences").  A barrier orders some or
+	all of the CPU's prior operations against some or all of its
+	subsequent operations.
+
+2.	Ordered memory accesses.  These operations order themselves
+	against some or all of the CPU's prior accesses or some or all
+	of the CPU's subsequent accesses, depending on the subcategory
+	of the operation.
+
+3.	Unordered accesses, as the name indicates, have no ordering
+	properties except to the extent that they interact with an
+	operation in the previous categories.  This being the real world,
+	some of these "unordered" operations provide limited ordering
+	in some special situations.
+
+Each of the above categories is described in more detail by one of the
+following sections.
+
+
+Barriers
+========
+
+Each of the following categories of barriers is described in its own
+subsection below:
+
+a.	Full memory barriers.
+
+b.	Read-modify-write (RMW) ordering augmentation barriers.
+
+c.	Write memory barrier.
+
+d.	Read memory barrier.
+
+e.	Compiler barrier.
+
+Note well that many of these primitives generate absolutely no code
+in kernels built with CONFIG_SMP=n.  Therefore, if you are writing
+a device driver, which must correctly order accesses to a physical
+device even in kernels built with CONFIG_SMP=n, please use the
+ordering primitives provided for that purpose.  For example, instead of
+smp_mb(), use mb().  See the "Linux Kernel Device Drivers" book or the
+https://lwn.net/Articles/698014/ article for more information.
+
+
+Full Memory Barriers
+--------------------
+
+The Linux-kernel primitives that provide full ordering include:
+
+o	The smp_mb() full memory barrier.
+
+o	Value-returning RMW atomic operations whose names do not end in
+	_acquire, _release, or _relaxed.
+
+o	RCU's grace-period primitives.
+
+First, the smp_mb() full memory barrier orders all of the CPU's prior
+accesses against all subsequent accesses from the viewpoint of all CPUs.
+In other words, all CPUs will agree that any earlier action taken
+by that CPU happened before any later action taken by that same CPU.
+For example, consider the following:
+
+	WRITE_ONCE(x, 1);
+	smp_mb(); // Order store to x before load from y.
+	r1 = READ_ONCE(y);
+
+All CPUs will agree that the store to "x" happened before the load
+from "y", as indicated by the comment.  And yes, please comment your
+memory-ordering primitives.  It is surprisingly hard to remember their
+purpose after even a few months.
+
+Second, some RMW atomic operations provide full ordering.  These
+operations include value-returning RMW atomic operations (that is, those
+with non-void return types) whose names do not end in _acquire, _release,
+or _relaxed.  Examples include atomic_add_return(), atomic_dec_and_test(),
+cmpxchg(), and xchg().  Note that conditional RMW atomic operations such
+as cmpxchg() are only guaranteed to provide ordering when they succeed.
+When RMW atomic operations provide full ordering, they partition the
+CPU's accesses into three groups:
+
+1.	All code that executed prior to the RMW atomic operation.
+
+2.	The RMW atomic operation itself.
+
+3.	All code that executed after the RMW atomic operation.
+
+All CPUs will agree that any operation in a given partition happened
+before any operation in a higher-numbered partition.
+
+In contrast, non-value-returning RMW atomic operations (that is, those
+with void return types) do not guarantee any ordering whatsoever.  Nor do
+value-returning RMW atomic operations whose names end in _relaxed.
+Examples of the former include atomic_inc() and atomic_dec(),
+while examples of the latter include atomic_cmpxchg_relaxed() and
+atomic_xchg_relaxed().  Similarly, value-returning non-RMW atomic
+operations such as atomic_read() do not guarantee full ordering, and
+are covered in the later section on unordered operations.
+
+Value-returning RMW atomic operations whose names end in _acquire or
+_release provide limited ordering, and will be described later in this
+document.
+
+Finally, RCU's grace-period primitives provide full ordering.  These
+primitives include synchronize_rcu(), synchronize_rcu_expedited(),
+synchronize_srcu() and so on.  However, these primitives have orders
+of magnitude greater overhead than smp_mb(), atomic_xchg(), and so on.
+Furthermore, RCU's grace-period primitives can only be invoked in
+sleepable contexts.  Therefore, RCU's grace-period primitives are
+typically instead used to provide ordering against RCU read-side critical
+sections, as documented in their comment headers.  But of course if you
+need a synchronize_rcu() to interact with readers, it costs you nothing
+to also rely on its additional full-memory-barrier semantics.  Just please
+carefully comment this, otherwise your future self will hate you.
+
+
+RMW Ordering Augmentation Barriers
+----------------------------------
+
+As noted in the previous section, non-value-returning RMW operations
+such as atomic_inc() and atomic_dec() guarantee no ordering whatsoever.
+Nevertheless, a number of popular CPU families, including x86, provide
+full ordering for these primitives.  One way to obtain full ordering on
+all architectures is to add a call to smp_mb():
+
+	WRITE_ONCE(x, 1);
+	atomic_inc(&my_counter);
+	smp_mb(); // Inefficient on x86!!!
+	r1 = READ_ONCE(y);
+
+This works, but the added smp_mb() adds needless overhead for
+x86, on which atomic_inc() provides full ordering all by itself.
+The smp_mb__after_atomic() primitive can be used instead:
+
+	WRITE_ONCE(x, 1);
+	atomic_inc(&my_counter);
+	smp_mb__after_atomic(); // Order store to x before load from y.
+	r1 = READ_ONCE(y);
+
+The smp_mb__after_atomic() primitive emits code only on CPUs whose
+atomic_inc() implementations do not guarantee full ordering, thus
+incurring no unnecessary overhead on x86.  There are a number of
+variations on the smp_mb__*() theme:
+
+o	smp_mb__before_atomic(), which provides full ordering prior
+	to an unordered RMW atomic operation.
+
+o	smp_mb__after_atomic(), which, as shown above, provides full
+	ordering subsequent to an unordered RMW atomic operation.
+
+o	smp_mb__after_spinlock(), which provides full ordering subsequent
+	to a successful spinlock acquisition.  Note that spin_lock() is
+	always successful but spin_trylock() might not be.
+
+o	smp_mb__after_srcu_read_unlock(), which provides full ordering
+	subsequent to an srcu_read_unlock().
+
+It is bad practice to place code between the smp__*() primitive and the
+operation whose ordering that it is augmenting.  The reason is that the
+ordering of this intervening code will differ from one CPU architecture
+to another.
+
+
+Write Memory Barrier
+--------------------
+
+The Linux kernel's write memory barrier is smp_wmb().  If a CPU executes
+the following code:
+
+	WRITE_ONCE(x, 1);
+	smp_wmb();
+	WRITE_ONCE(y, 1);
+
+Then any given CPU will see the write to "x" has having happened before
+the write to "y".  However, you are usually better off using a release
+store, as described in the "Release Operations" section below.
+
+Note that smp_wmb() might fail to provide ordering for unmarked C-language
+stores because profile-driven optimization could determine that the
+value being overwritten is almost always equal to the new value.  Such a
+compiler might then reasonably decide to transform "x = 1" and "y = 1"
+as follows:
+
+	if (x != 1)
+		x = 1;
+	smp_wmb(); // BUG: does not order the reads!!!
+	if (y != 1)
+		y = 1;
+
+Therefore, if you need to use smp_wmb() with unmarked C-language writes,
+you will need to make sure that none of the compilers used to build
+the Linux kernel carry out this sort of transformation, both now and in
+the future.
+
+
+Read Memory Barrier
+-------------------
+
+The Linux kernel's read memory barrier is smp_rmb().  If a CPU executes
+the following code:
+
+	r0 = READ_ONCE(y);
+	smp_rmb();
+	r1 = READ_ONCE(x);
+
+Then any given CPU will see the read from "y" as having preceded the read from
+"x".  However, you are usually better off using an acquire load, as described
+in the "Acquire Operations" section below.
+
+Compiler Barrier
+----------------
+
+The Linux kernel's compiler barrier is barrier().  This primitive
+prohibits compiler code-motion optimizations that might move memory
+references across the point in the code containing the barrier(), but
+does not constrain hardware memory ordering.  For example, this can be
+used to prevent to compiler from moving code across an infinite loop:
+
+	WRITE_ONCE(x, 1);
+	while (dontstop)
+		barrier();
+	r1 = READ_ONCE(y);
+
+Without the barrier(), the compiler would be within its rights to move the
+WRITE_ONCE() to follow the loop.  This code motion could be problematic
+in the case where an interrupt handler terminates the loop.  Another way
+to handle this is to use READ_ONCE() for the load of "dontstop".
+
+Note that the barriers discussed previously use barrier() or its low-level
+equivalent in their implementations.
+
+
+Ordered Memory Accesses
+=======================
+
+The Linux kernel provides a wide variety of ordered memory accesses:
+
+a.	Release operations.
+
+b.	Acquire operations.
+
+c.	RCU read-side ordering.
+
+d.	Control dependencies.
+
+Each of the above categories has its own section below.
+
+
+Release Operations
+------------------
+
+Release operations include smp_store_release(), atomic_set_release(),
+rcu_assign_pointer(), and value-returning RMW operations whose names
+end in _release.  These operations order their own store against all
+of the CPU's prior memory accesses.  Release operations often provide
+improved readability and performance compared to explicit barriers.
+For example, use of smp_store_release() saves a line compared to the
+smp_wmb() example above:
+
+	WRITE_ONCE(x, 1);
+	smp_store_release(&y, 1);
+
+More important, smp_store_release() makes it easier to connect up the
+different pieces of the concurrent algorithm.  The variable stored to
+by the smp_store_release(), in this case "y", will normally be used in
+an acquire operation in other parts of the concurrent algorithm.
+
+To see the performance advantages, suppose that the above example read
+from "x" instead of writing to it.  Then an smp_wmb() could not guarantee
+ordering, and an smp_mb() would be needed instead:
+
+	r1 = READ_ONCE(x);
+	smp_mb();
+	WRITE_ONCE(y, 1);
+
+But smp_mb() often incurs much higher overhead than does
+smp_store_release(), which still provides the needed ordering of "x"
+against "y".  On x86, the version using smp_store_release() might compile
+to a simple load instruction followed by a simple store instruction.
+In contrast, the smp_mb() compiles to an expensive instruction that
+provides the needed ordering.
+
+There is a wide variety of release operations:
+
+o	Store operations, including not only the aforementioned
+	smp_store_release(), but also atomic_set_release(), and
+	atomic_long_set_release().
+
+o	RCU's rcu_assign_pointer() operation.  This is the same as
+	smp_store_release() except that: (1) It takes the pointer to
+	be assigned to instead of a pointer to that pointer, (2) It
+	is intended to be used in conjunction with rcu_dereference()
+	and similar rather than smp_load_acquire(), and (3) It checks
+	for an RCU-protected pointer in "sparse" runs.
+
+o	Value-returning RMW operations whose names end in _release,
+	such as atomic_fetch_add_release() and cmpxchg_release().
+	Note that release ordering is guaranteed only against the
+	memory-store portion of the RMW operation, and not against the
+	memory-load portion.  Note also that conditional operations such
+	as cmpxchg_release() are only guaranteed to provide ordering
+	when they succeed.
+
+As mentioned earlier, release operations are often paired with acquire
+operations, which are the subject of the next section.
+
+
+Acquire Operations
+------------------
+
+Acquire operations include smp_load_acquire(), atomic_read_acquire(),
+and value-returning RMW operations whose names end in _acquire.   These
+operations order their own load against all of the CPU's subsequent
+memory accesses.  Acquire operations often provide improved performance
+and readability compared to explicit barriers.  For example, use of
+smp_load_acquire() saves a line compared to the smp_rmb() example above:
+
+	r0 = smp_load_acquire(&y);
+	r1 = READ_ONCE(x);
+
+As with smp_store_release(), this also makes it easier to connect
+the different pieces of the concurrent algorithm by looking for the
+smp_store_release() that stores to "y".  In addition, smp_load_acquire()
+improves upon smp_rmb() by ordering against subsequent stores as well
+as against subsequent loads.
+
+There are a couple of categories of acquire operations:
+
+o	Load operations, including not only the aforementioned
+	smp_load_acquire(), but also atomic_read_acquire(), and
+	atomic64_read_acquire().
+
+o	Value-returning RMW operations whose names end in _acquire,
+	such as atomic_xchg_acquire() and atomic_cmpxchg_acquire().
+	Note that acquire ordering is guaranteed only against the
+	memory-load portion of the RMW operation, and not against the
+	memory-store portion.  Note also that conditional operations
+	such as atomic_cmpxchg_acquire() are only guaranteed to provide
+	ordering when they succeed.
+
+Symmetry being what it is, acquire operations are often paired with the
+release operations covered earlier.  For example, consider the following
+example, where task0() and task1() execute concurrently:
+
+	void task0(void)
+	{
+		WRITE_ONCE(x, 1);
+		smp_store_release(&y, 1);
+	}
+
+	void task1(void)
+	{
+		r0 = smp_load_acquire(&y);
+		r1 = READ_ONCE(x);
+	}
+
+If "x" and "y" are both initially zero, then either r0's final value
+will be zero or r1's final value will be one, thus providing the required
+ordering.
+
+
+RCU Read-Side Ordering
+----------------------
+
+This category includes read-side markers such as rcu_read_lock()
+and rcu_read_unlock() as well as pointer-traversal primitives such as
+rcu_dereference() and srcu_dereference().
+
+Compared to locking primitives and RMW atomic operations, markers
+for RCU read-side critical sections incur very low overhead because
+they interact only with the corresponding grace-period primitives.
+For example, the rcu_read_lock() and rcu_read_unlock() markers interact
+with synchronize_rcu(), synchronize_rcu_expedited(), and call_rcu().
+The way this works is that if a given call to synchronize_rcu() cannot
+prove that it started before a given call to rcu_read_lock(), then
+that synchronize_rcu() must block until the matching rcu_read_unlock()
+is reached.  For more information, please see the synchronize_rcu()
+docbook header comment and the material in Documentation/RCU.
+
+RCU's pointer-traversal primitives, including rcu_dereference() and
+srcu_dereference(), order their load (which must be a pointer) against any
+of the CPU's subsequent memory accesses whose address has been calculated
+from the value loaded.  There is said to be an *address dependency*
+from the value returned by the rcu_dereference() or srcu_dereference()
+to that subsequent memory access.
+
+A call to rcu_dereference() for a given RCU-protected pointer is
+usually paired with a call to a call to rcu_assign_pointer() for that
+same pointer in much the same way that a call to smp_load_acquire() is
+paired with a call to smp_store_release().  Calls to rcu_dereference()
+and rcu_assign_pointer are often buried in other APIs, for example,
+the RCU list API members defined in include/linux/rculist.h.  For more
+information, please see the docbook headers in that file, the most
+recent LWN article on the RCU API (https://lwn.net/Articles/777036/),
+and of course the material in Documentation/RCU.
+
+If the pointer value is manipulated between the rcu_dereference()
+that returned it and a later dereference(), please read
+Documentation/RCU/rcu_dereference.rst.  It can also be quite helpful to
+review uses in the Linux kernel.
+
+
+Control Dependencies
+--------------------
+
+A control dependency extends from a marked load (READ_ONCE() or stronger)
+through an "if" condition to a marked store (WRITE_ONCE() or stronger)
+that is executed only by one of the legs of that "if" statement.
+Control dependencies are so named because they are mediated by
+control-flow instructions such as comparisons and conditional branches.
+
+In short, you can use a control dependency to enforce ordering between
+an READ_ONCE() and a WRITE_ONCE() when there is an "if" condition
+between them.  The canonical example is as follows:
+
+	q = READ_ONCE(a);
+	if (q)
+		WRITE_ONCE(b, 1);
+
+In this case, all CPUs would see the read from "a" as happening before
+the write to "b".
+
+However, control dependencies are easily destroyed by compiler
+optimizations, so any use of control dependencies must take into account
+all of the compilers used to build the Linux kernel.  Please see the
+"control-dependencies.txt" file for more information.
+
+
+Unordered Accesses
+==================
+
+Each of these two categories of unordered accesses has a section below:
+
+a.	Unordered marked operations.
+
+b.	Unmarked C-language accesses.
+
+
+Unordered Marked Operations
+---------------------------
+
+Unordered operations to different variables are just that, unordered.
+However, if a group of CPUs apply these operations to a single variable,
+all the CPUs will agree on the operation order.  Of course, the ordering
+of unordered marked accesses can also be constrained using the mechanisms
+described earlier in this document.
+
+These operations come in three categories:
+
+o	Marked writes, such as WRITE_ONCE() and atomic_set().  These
+	primitives required the compiler to emit the corresponding store
+	instructions in the expected execution order, thus suppressing
+	a number of destructive optimizations.	However, they provide no
+	hardware ordering guarantees, and in fact many CPUs will happily
+	reorder marked writes with each other or with other unordered
+	operations, unless these operations are to the same variable.
+
+o	Marked reads, such as READ_ONCE() and atomic_read().  These
+	primitives required the compiler to emit the corresponding load
+	instructions in the expected execution order, thus suppressing
+	a number of destructive optimizations.	However, they provide no
+	hardware ordering guarantees, and in fact many CPUs will happily
+	reorder marked reads with each other or with other unordered
+	operations, unless these operations are to the same variable.
+
+o	Unordered RMW atomic operations.  These are non-value-returning
+	RMW atomic operations whose names do not end in _acquire or
+	_release, and also value-returning RMW operations whose names
+	end in _relaxed.  Examples include atomic_add(), atomic_or(),
+	and atomic64_fetch_xor_relaxed().  These operations do carry
+	out the specified RMW operation atomically, for example, five
+	concurrent atomic_inc() operations applied to a given variable
+	will reliably increase the value of that variable by five.
+	However, many CPUs will happily reorder these operations with
+	each other or with other unordered operations.
+
+	This category of operations can be efficiently ordered using
+	smp_mb__before_atomic() and smp_mb__after_atomic(), as was
+	discussed in the "RMW Ordering Augmentation Barriers" section.
+
+In short, these operations can be freely reordered unless they are all
+operating on a single variable or unless they are constrained by one of
+the operations called out earlier in this document.
+
+
+Unmarked C-Language Accesses
+----------------------------
+
+Unmarked C-language accesses are normal variable accesses to normal
+variables, that is, to variables that are not "volatile" and are not
+C11 atomic variables.  These operations provide no ordering guarantees,
+and further do not guarantee "atomic" access.  For example, the compiler
+might (and sometimes does) split a plain C-language store into multiple
+smaller stores.  A load from that same variable running on some other
+CPU while such a store is executing might see a value that is a mashup
+of the old value and the new value.
+
+Unmarked C-language accesses are unordered, and are also subject to
+any number of compiler optimizations, many of which can break your
+concurrent code.  It is possible to used unmarked C-language accesses for
+shared variables that are subject to concurrent access, but great care
+is required on an ongoing basis.  The compiler-constraining barrier()
+primitive can be helpful, as can the various ordering primitives discussed
+in this document.  It nevertheless bears repeating that use of unmarked
+C-language accesses requires careful attention to not just your code,
+but to all the compilers that might be used to build it.  Such compilers
+might replace a series of loads with a single load, and might replace
+a series of stores with a single store.  Some compilers will even split
+a single store into multiple smaller stores.
+
+But there are some ways of using unmarked C-language accesses for shared
+variables without such worries:
+
+o	Guard all accesses to a given variable by a particular lock,
+	so that there are never concurrent conflicting accesses to
+	that variable.	(There are "conflicting accesses" when
+	(1) at least one of the concurrent accesses to a variable is an
+	unmarked C-language access and (2) when at least one of those
+	accesses is a write, whether marked or not.)
+
+o	As above, but using other synchronization primitives such
+	as reader-writer locks or sequence locks.
+
+o	Use locking or other means to ensure that all concurrent accesses
+	to a given variable are reads.
+
+o	Restrict use of a given variable to statistics or heuristics
+	where the occasional bogus value can be tolerated.
+
+o	Declare the accessed variables as C11 atomics.
+	https://lwn.net/Articles/691128/
+
+o	Declare the accessed variables as "volatile".
+
+If you need to live more dangerously, please do take the time to
+understand the compilers.  One place to start is these two LWN
+articles:
+
+Who's afraid of a big bad optimizing compiler?
+	https://lwn.net/Articles/793253
+Calibrating your fear of big bad optimizing compilers
+	https://lwn.net/Articles/799218
+
+Used properly, unmarked C-language accesses can reduce overhead on
+fastpaths.  However, the price is great care and continual attention
+to your compiler as new versions come out and as new optimizations
+are enabled.
-- 
cgit v1.2.3


From 0a27ce6b6968866fa8e3bd70371d67752db7718f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 22 Oct 2020 15:16:08 -0700
Subject: tools/memory-model: Add a glossary of LKMM terms

[ paulmck: Apply Alan Stern feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/glossary.txt | 172 ++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 tools/memory-model/Documentation/glossary.txt

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
new file mode 100644
index 000000000000..79acb75d56ea
--- /dev/null
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -0,0 +1,172 @@
+This document contains brief definitions of LKMM-related terms.  Like most
+glossaries, it is not intended to be read front to back (except perhaps
+as a way of confirming a diagnosis of OCD), but rather to be searched
+for specific terms.
+
+
+Address Dependency:  When the address of a later memory access is computed
+	based on the value returned by an earlier load, an "address
+	dependency" extends from that load extending to the later access.
+	Address dependencies are quite common in RCU read-side critical
+	sections:
+
+	 1 rcu_read_lock();
+	 2 p = rcu_dereference(gp);
+	 3 do_something(p->a);
+	 4 rcu_read_unlock();
+
+	 In this case, because the address of "p->a" on line 3 is computed
+	 from the value returned by the rcu_dereference() on line 2, the
+	 address dependency extends from that rcu_dereference() to that
+	 "p->a".  In rare cases, optimizing compilers can destroy address
+	 dependencies.	Please see Documentation/RCU/rcu_dereference.txt
+	 for more information.
+
+	 See also "Control Dependency" and "Data Dependency".
+
+Acquire:  With respect to a lock, acquiring that lock, for example,
+	using spin_lock().  With respect to a non-lock shared variable,
+	a special operation that includes a load and which orders that
+	load before later memory references running on that same CPU.
+	An example special acquire operation is smp_load_acquire(),
+	but atomic_read_acquire() and atomic_xchg_acquire() also include
+	acquire loads.
+
+	When an acquire load returns the value stored by a release store
+	to that same variable, then all operations preceding that store
+	happen before any operations following that load acquire.
+
+	See also "Relaxed" and "Release".
+
+Coherence (co):  When one CPU's store to a given variable overwrites
+	either the value from another CPU's store or some later value,
+	there is said to be a coherence link from the second CPU to
+	the first.
+
+	It is also possible to have a coherence link within a CPU, which
+	is a "coherence internal" (coi) link.  The term "coherence
+	external" (coe) link is used when it is necessary to exclude
+	the coi case.
+
+	See also "From-reads" and "Reads-from".
+
+Control Dependency:  When a later store's execution depends on a test
+	of a value computed from a value returned by an earlier load,
+	a "control dependency" extends from that load to that store.
+	For example:
+
+	 1 if (READ_ONCE(x))
+	 2   WRITE_ONCE(y, 1);
+
+	 Here, the control dependency extends from the READ_ONCE() on
+	 line 1 to the WRITE_ONCE() on line 2.	Control dependencies are
+	 fragile, and can be easily destroyed by optimizing compilers.
+	 Please see control-dependencies.txt for more information.
+
+	 See also "Address Dependency" and "Data Dependency".
+
+Cycle:	Memory-barrier pairing is restricted to a pair of CPUs, as the
+	name suggests.	And in a great many cases, a pair of CPUs is all
+	that is required.  In other cases, the notion of pairing must be
+	extended to additional CPUs, and the result is called a "cycle".
+	In a cycle, each CPU's ordering interacts with that of the next:
+
+	CPU 0                CPU 1                CPU 2
+	WRITE_ONCE(x, 1);    WRITE_ONCE(y, 1);    WRITE_ONCE(z, 1);
+	smp_mb();            smp_mb();            smp_mb();
+	r0 = READ_ONCE(y);   r1 = READ_ONCE(z);   r2 = READ_ONCE(x);
+
+	CPU 0's smp_mb() interacts with that of CPU 1, which interacts
+	with that of CPU 2, which in turn interacts with that of CPU 0
+	to complete the cycle.	Because of the smp_mb() calls between
+	each pair of memory accesses, the outcome where r0, r1, and r2
+	are all equal to zero is forbidden by LKMM.
+
+	See also "Pairing".
+
+Data Dependency:  When the data written by a later store is computed based
+	on the value returned by an earlier load, a "data dependency"
+	extends from that load to that later store.  For example:
+
+	 1 r1 = READ_ONCE(x);
+	 2 WRITE_ONCE(y, r1 + 1);
+
+	In this case, the data dependency extends from the READ_ONCE()
+	on line 1 to the WRITE_ONCE() on line 2.  Data dependencies are
+	fragile and can be easily destroyed by optimizing compilers.
+	Because optimizing compilers put a great deal of effort into
+	working out what values integer variables might have, this is
+	especially true in cases where the dependency is carried through
+	an integer.
+
+	See also "Address Dependency" and "Control Dependency".
+
+From-Reads (fr):  When one CPU's store to a given variable happened
+	too late to affect the value returned by another CPU's
+	load from that same variable, there is said to be a from-reads
+	link from the load to the store.
+
+	It is also possible to have a from-reads link within a CPU, which
+	is a "from-reads internal" (fri) link.  The term "from-reads
+	external" (fre) link is used when it is necessary to exclude
+	the fri case.
+
+	See also "Coherence" and "Reads-from".
+
+Fully Ordered:  An operation such as smp_mb() that orders all of
+	its CPU's prior accesses with all of that CPU's subsequent
+	accesses, or a marked access such as atomic_add_return()
+	that orders all of its CPU's prior accesses, itself, and
+	all of its CPU's subsequent accesses.
+
+Marked Access:  An access to a variable that uses an special function or
+	macro such as "r1 = READ_ONCE(x)" or "smp_store_release(&a, 1)".
+
+	See also "Unmarked Access".
+
+Pairing: "Memory-barrier pairing" reflects the fact that synchronizing
+	data between two CPUs requires that both CPUs their accesses.
+	Memory barriers thus tend to come in pairs, one executed by
+	one of the CPUs and the other by the other CPU.  Of course,
+	pairing also occurs with other types of operations, so that a
+	smp_store_release() pairs with an smp_load_acquire() that reads
+	the value stored.
+
+	See also "Cycle".
+
+Reads-From (rf):  When one CPU's load returns the value stored by some other
+	CPU, there is said to be a reads-from link from the second
+	CPU's store to the first CPU's load.  Reads-from links have the
+	nice property that time must advance from the store to the load,
+	which means that algorithms using reads-from links can use lighter
+	weight ordering and synchronization compared to algorithms using
+	coherence and from-reads links.
+
+	It is also possible to have a reads-from link within a CPU, which
+	is a "reads-from internal" (rfi) link.	The term "reads-from
+	external" (rfe) link is used when it is necessary to exclude
+	the rfi case.
+
+	See also Coherence" and "From-reads".
+
+Relaxed:  A marked access that does not imply ordering, for example, a
+	READ_ONCE(), WRITE_ONCE(), a non-value-returning read-modify-write
+	operation, or a value-returning read-modify-write operation whose
+	name ends in "_relaxed".
+
+	See also "Acquire" and "Release".
+
+Release:  With respect to a lock, releasing that lock, for example,
+	using spin_unlock().  With respect to a non-lock shared variable,
+	a special operation that includes a store and which orders that
+	store after earlier memory references that ran on that same CPU.
+	An example special release store is smp_store_release(), but
+	atomic_set_release() and atomic_cmpxchg_release() also include
+	release stores.
+
+	See also "Acquire" and "Relaxed".
+
+Unmarked Access:  An access to a variable that uses normal C-language
+	syntax, for example, "a = b[2]";
+
+	See also "Marked Access".
-- 
cgit v1.2.3


From 1947bfcf81a905e84a58b423063e81034a90efed Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Nov 2020 13:20:56 -0800
Subject: tools/memory-model: Add types to litmus tests

This commit adds type information for global variables in the litmus
tests in order to allow easier use with klitmus7.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus        | 4 +++-
 tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus        | 4 +++-
 tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus        | 4 +++-
 tools/memory-model/litmus-tests/CoWW+poonceonce.litmus             | 4 +++-
 .../litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus             | 5 ++++-
 tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus   | 5 ++++-
 .../litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus        | 7 ++++++-
 tools/memory-model/litmus-tests/ISA2+poonceonces.litmus            | 6 +++++-
 .../ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus       | 6 +++++-
 .../litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus            | 5 ++++-
 .../litmus-tests/LB+poacquireonce+pooncerelease.litmus             | 5 ++++-
 tools/memory-model/litmus-tests/LB+poonceonces.litmus              | 5 ++++-
 .../litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus       | 5 ++++-
 tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus     | 5 +++--
 .../litmus-tests/MP+polockmbonce+poacquiresilsil.litmus            | 2 ++
 .../memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus | 2 ++
 tools/memory-model/litmus-tests/MP+polocks.litmus                  | 6 +++++-
 tools/memory-model/litmus-tests/MP+poonceonces.litmus              | 5 ++++-
 .../litmus-tests/MP+pooncerelease+poacquireonce.litmus             | 5 ++++-
 tools/memory-model/litmus-tests/MP+porevlocks.litmus               | 6 +++++-
 tools/memory-model/litmus-tests/R+fencembonceonces.litmus          | 5 ++++-
 tools/memory-model/litmus-tests/R+poonceonces.litmus               | 5 ++++-
 .../litmus-tests/S+fencewmbonceonce+poacquireonce.litmus           | 5 ++++-
 tools/memory-model/litmus-tests/S+poonceonces.litmus               | 5 ++++-
 tools/memory-model/litmus-tests/SB+fencembonceonces.litmus         | 5 ++++-
 tools/memory-model/litmus-tests/SB+poonceonces.litmus              | 5 ++++-
 tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus  | 5 ++++-
 tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus        | 5 ++++-
 .../litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus    | 5 ++++-
 .../litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus        | 7 ++++++-
 .../litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus        | 7 ++++++-
 .../Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus     | 6 +++++-
 32 files changed, 130 insertions(+), 31 deletions(-)

(limited to 'tools')

diff --git a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
index 967f9f2a6226..772544f03fb5 100644
--- a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
@@ -7,7 +7,9 @@ C CoRR+poonceonce+Once
  * reads from the same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
index 4635739f3974..5faae98f7ffb 100644
--- a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
@@ -7,7 +7,9 @@ C CoRW+poonceonce+Once
  * a given variable and a later write to that same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
index bb068c92d8da..77c9cc9f8dc6 100644
--- a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
@@ -7,7 +7,9 @@ C CoWR+poonceonce+Once
  * given variable and a later read from that same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
index 0d9f0a958799..85ef746f511a 100644
--- a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
+++ b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
@@ -7,7 +7,9 @@ C CoWW+poonceonce
  * writes to the same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
index e729d2776e89..87aa900125ab 100644
--- a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
@@ -10,7 +10,10 @@ C IRIW+fencembonceonces+OnceOnce
  * process?  This litmus test exercises LKMM's "propagation" rule.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
index 4b54dd6a6cd9..f84022dca555 100644
--- a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
@@ -10,7 +10,10 @@ C IRIW+poonceonces+OnceOnce
  * different process?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
index 094d58df7789..398f624daa77 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
@@ -7,7 +7,12 @@ C ISA2+pooncelock+pooncelock+pombonce
  * (in P0() and P1()) is visible to external process P2().
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
index b321aa6f4ea5..212a432ba16b 100644
--- a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
@@ -9,7 +9,11 @@ C ISA2+poonceonces
  * of the smp_load_acquire() invocations are replaced by READ_ONCE()?
  *)
 
-{}
+{
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
index 025b0462ec9b..7afd85672ccd 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
@@ -11,7 +11,11 @@ C ISA2+pooncerelease+poacquirerelease+poacquireonce
  * (AKA non-rf) link, so release-acquire is all that is needed.
  *)
 
-{}
+{
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
index 4727f5aaf03b..c8a93c7ee556 100644
--- a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
+++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
@@ -11,7 +11,10 @@ C LB+fencembonceonce+ctrlonceonce
  * another control dependency and order would still be maintained.)
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
index 07b9904b0e49..2fa029568fa1 100644
--- a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
+++ b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
@@ -8,7 +8,10 @@ C LB+poacquireonce+pooncerelease
  * to the other?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/LB+poonceonces.litmus b/tools/memory-model/litmus-tests/LB+poonceonces.litmus
index 74c49cb3c37b..2107306e8625 100644
--- a/tools/memory-model/litmus-tests/LB+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/LB+poonceonces.litmus
@@ -7,7 +7,10 @@ C LB+poonceonces
  * be prevented even with no explicit ordering?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index a273da9faa6d..e04b71b0ce2b 100644
--- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -8,7 +8,10 @@ C MP+fencewmbonceonce+fencermbonceonce
  * is usually better to use smp_store_release() and smp_load_acquire().
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
index 97731b4bbdd8..18df682b08b2 100644
--- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -10,8 +10,9 @@ C MP+onceassign+derefonce
  *)
 
 {
-y=z;
-z=0;
+	int x;
+	int *y=z;
+	int z=0;
 }
 
 P0(int *x, int **y)
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
index 50f4d62bbf0e..b1b1266fb49a 100644
--- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -11,6 +11,8 @@ C MP+polockmbonce+poacquiresilsil
  *)
 
 {
+	spinlock_t lo;
+	int x;
 }
 
 P0(spinlock_t *lo, int *x)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
index abf81e7a0895..867c75d8b960 100644
--- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -11,6 +11,8 @@ C MP+polockonce+poacquiresilsil
  *)
 
 {
+	spinlock_t lo;
+	int x;
 }
 
 P0(spinlock_t *lo, int *x)
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
index 712a4fcdf6ce..63e0f67c9b9d 100644
--- a/tools/memory-model/litmus-tests/MP+polocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -11,7 +11,11 @@ C MP+polocks
  * to see all prior accesses by those other CPUs.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index 172f0145301c..68180a403e5c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -7,7 +7,10 @@ C MP+poonceonces
  * no ordering at all?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
index d52c68429722..19f3e6874b50 100644
--- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -8,7 +8,10 @@ C MP+pooncerelease+poacquireonce
  * pattern.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
index 72c9276b363e..4ac189adf41e 100644
--- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -11,7 +11,11 @@ C MP+porevlocks
  * see all prior accesses by those other CPUs.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
index 222a0b850b4a..af9463b39b4a 100644
--- a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
+++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
@@ -9,7 +9,10 @@ C R+fencembonceonces
  * cause the resulting test to be allowed.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/R+poonceonces.litmus b/tools/memory-model/litmus-tests/R+poonceonces.litmus
index 5386f128a131..bcd5574e304a 100644
--- a/tools/memory-model/litmus-tests/R+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/R+poonceonces.litmus
@@ -8,7 +8,10 @@ C R+poonceonces
  * store propagation delays.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
index 18479823cd6c..c36341d1aed6 100644
--- a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
@@ -7,7 +7,10 @@ C S+fencewmbonceonce+poacquireonce
  * store against a subsequent store?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/S+poonceonces.litmus b/tools/memory-model/litmus-tests/S+poonceonces.litmus
index 8c9c2f81a580..7775c23143a0 100644
--- a/tools/memory-model/litmus-tests/S+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/S+poonceonces.litmus
@@ -9,7 +9,10 @@ C S+poonceonces
  * READ_ONCE(), is ordering preserved?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
index ed5fff18d223..833cdfeb7c09 100644
--- a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
@@ -9,7 +9,10 @@ C SB+fencembonceonces
  * suffice, but not much else.)
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/SB+poonceonces.litmus b/tools/memory-model/litmus-tests/SB+poonceonces.litmus
index 10d550730b25..c92211ecbfdf 100644
--- a/tools/memory-model/litmus-tests/SB+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+poonceonces.litmus
@@ -8,7 +8,10 @@ C SB+poonceonces
  * variable that the preceding process reads.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
index 04a16603660b..84344b455eb7 100644
--- a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
@@ -6,7 +6,10 @@ C SB+rfionceonce-poonceonces
  * This litmus test demonstrates that LKMM is not fully multicopy atomic.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
index 6a2bc12a1af1..431494708611 100644
--- a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
@@ -8,7 +8,10 @@ C WRC+poonceonces+Once
  * test has no ordering at all.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
index e9947250d7de..554999c64db5 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
@@ -10,7 +10,10 @@ C WRC+pooncerelease+fencermbonceonce+Once
  * is A-cumulative in LKMM.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
index 415248fb6699..265a95ffef13 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
@@ -9,7 +9,12 @@ C Z6.0+pooncelock+poonceLock+pombonce
  * by CPUs not holding that lock.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
index 10a2aa04cd07..0c9aea8e80df 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
@@ -8,7 +8,12 @@ C Z6.0+pooncelock+pooncelock+pombonce
  * seen as ordered by a third process not holding that lock.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
index 88e70b87a683..661f9aaa5791 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
@@ -14,7 +14,11 @@ C Z6.0+pooncerelease+poacquirerelease+fencembonceonce
  * involving locking.)
  *)
 
-{}
+{
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y)
 {
-- 
cgit v1.2.3


From acc4bdc55dcb7d7fe0be736999572a55e121873f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Nov 2020 13:30:11 -0800
Subject: tools/memory-model: Use "buf" and "flag" for message-passing tests

The use of "x" and "y" for message-passing tests is fine for people
familiar with memory models and litmus-test nomenclature, but is a bit
obtuse for others.  This commit therefore substitutes "buf" for "x" and
"flag" for "y" for the MP tests.  There are a few special-case MP tests
that use locks and these are unchanged.  There is another MP test that
uses pointers, and this is changed to name the pointer "p".

Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../MP+fencewmbonceonce+fencermbonceonce.litmus          | 16 ++++++++--------
 .../litmus-tests/MP+onceassign+derefonce.litmus          | 12 ++++++------
 tools/memory-model/litmus-tests/MP+polocks.litmus        | 16 ++++++++--------
 tools/memory-model/litmus-tests/MP+poonceonces.litmus    | 16 ++++++++--------
 .../litmus-tests/MP+pooncerelease+poacquireonce.litmus   | 16 ++++++++--------
 tools/memory-model/litmus-tests/MP+porevlocks.litmus     | 16 ++++++++--------
 6 files changed, 46 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index e04b71b0ce2b..f15e501849dd 100644
--- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -9,25 +9,25 @@ C MP+fencewmbonceonce+fencermbonceonce
  *)
 
 {
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y)
+P0(int *buf, int *flag)
 {
-	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*buf, 1);
 	smp_wmb();
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *x, int *y)
+P1(int *buf, int *flag)
 {
 	int r0;
 	int r1;
 
-	r0 = READ_ONCE(*y);
+	r0 = READ_ONCE(*flag);
 	smp_rmb();
-	r1 = READ_ONCE(*x);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
index 18df682b08b2..ed8ee9bde0c9 100644
--- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -10,24 +10,24 @@ C MP+onceassign+derefonce
  *)
 
 {
+	int *p=y;
 	int x;
-	int *y=z;
-	int z=0;
+	int y=0;
 }
 
-P0(int *x, int **y)
+P0(int *x, int **p)
 {
 	WRITE_ONCE(*x, 1);
-	rcu_assign_pointer(*y, x);
+	rcu_assign_pointer(*p, x);
 }
 
-P1(int *x, int **y)
+P1(int *x, int **p)
 {
 	int *r0;
 	int r1;
 
 	rcu_read_lock();
-	r0 = rcu_dereference(*y);
+	r0 = rcu_dereference(*p);
 	r1 = READ_ONCE(*r0);
 	rcu_read_unlock();
 }
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
index 63e0f67c9b9d..4b0c2edcc029 100644
--- a/tools/memory-model/litmus-tests/MP+polocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -13,27 +13,27 @@ C MP+polocks
 
 {
 	spinlock_t mylock;
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock)
 {
-	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*buf, 1);
 	spin_lock(mylock);
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*flag, 1);
 	spin_unlock(mylock);
 }
 
-P1(int *x, int *y, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock)
 {
 	int r0;
 	int r1;
 
 	spin_lock(mylock);
-	r0 = READ_ONCE(*y);
+	r0 = READ_ONCE(*flag);
 	spin_unlock(mylock);
-	r1 = READ_ONCE(*x);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index 68180a403e5c..3010bbaec46c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -8,23 +8,23 @@ C MP+poonceonces
  *)
 
 {
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y)
+P0(int *buf, int *flag)
 {
-	WRITE_ONCE(*x, 1);
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*buf, 1);
+	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *x, int *y)
+P1(int *buf, int *flag)
 {
 	int r0;
 	int r1;
 
-	r0 = READ_ONCE(*y);
-	r1 = READ_ONCE(*x);
+	r0 = READ_ONCE(*flag);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
index 19f3e6874b50..21e825d5dea6 100644
--- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -9,23 +9,23 @@ C MP+pooncerelease+poacquireonce
  *)
 
 {
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y)
+P0(int *buf, int *flag)
 {
-	WRITE_ONCE(*x, 1);
-	smp_store_release(y, 1);
+	WRITE_ONCE(*buf, 1);
+	smp_store_release(flag, 1);
 }
 
-P1(int *x, int *y)
+P1(int *buf, int *flag)
 {
 	int r0;
 	int r1;
 
-	r0 = smp_load_acquire(y);
-	r1 = READ_ONCE(*x);
+	r0 = smp_load_acquire(flag);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
index 4ac189adf41e..9691d55b4e21 100644
--- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -13,27 +13,27 @@ C MP+porevlocks
 
 {
 	spinlock_t mylock;
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock)
 {
 	int r0;
 	int r1;
 
-	r0 = READ_ONCE(*y);
+	r0 = READ_ONCE(*flag);
 	spin_lock(mylock);
-	r1 = READ_ONCE(*x);
+	r1 = READ_ONCE(*buf);
 	spin_unlock(mylock);
 }
 
-P1(int *x, int *y, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock)
 {
 	spin_lock(mylock);
-	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*buf, 1);
 	spin_unlock(mylock);
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*flag, 1);
 }
 
 exists (0:r0=1 /\ 0:r1=0)
-- 
cgit v1.2.3


From b6ff30849ca723b78306514246b98ca5645d92f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Nov 2020 13:39:28 -0800
Subject: tools/memory-model: Label MP tests' producers and consumers

This commit adds comments that label the MP tests' producer and consumer
processes, and also that label the "exists" clause as the bad outcome.

Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus        | 6 +++---
 tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus      | 6 +++---
 .../litmus-tests/MP+polockmbonce+poacquiresilsil.litmus             | 6 +++---
 .../memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus  | 6 +++---
 tools/memory-model/litmus-tests/MP+polocks.litmus                   | 6 +++---
 tools/memory-model/litmus-tests/MP+poonceonces.litmus               | 6 +++---
 .../memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus | 6 +++---
 tools/memory-model/litmus-tests/MP+porevlocks.litmus                | 6 +++---
 8 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index f15e501849dd..c5c168d92973 100644
--- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -13,14 +13,14 @@ C MP+fencewmbonceonce+fencermbonceonce
 	int flag;
 }
 
-P0(int *buf, int *flag)
+P0(int *buf, int *flag) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	smp_wmb();
 	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *buf, int *flag)
+P1(int *buf, int *flag) // Consumer
 {
 	int r0;
 	int r1;
@@ -30,4 +30,4 @@ P1(int *buf, int *flag)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
index ed8ee9bde0c9..20ff62649f1e 100644
--- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -15,13 +15,13 @@ C MP+onceassign+derefonce
 	int y=0;
 }
 
-P0(int *x, int **p)
+P0(int *x, int **p) // Producer
 {
 	WRITE_ONCE(*x, 1);
 	rcu_assign_pointer(*p, x);
 }
 
-P1(int *x, int **p)
+P1(int *x, int **p) // Consumer
 {
 	int *r0;
 	int r1;
@@ -32,4 +32,4 @@ P1(int *x, int **p)
 	rcu_read_unlock();
 }
 
-exists (1:r0=x /\ 1:r1=0)
+exists (1:r0=x /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
index b1b1266fb49a..153917ad5dc9 100644
--- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -15,7 +15,7 @@ C MP+polockmbonce+poacquiresilsil
 	int x;
 }
 
-P0(spinlock_t *lo, int *x)
+P0(spinlock_t *lo, int *x) // Producer
 {
 	spin_lock(lo);
 	smp_mb__after_spinlock();
@@ -23,7 +23,7 @@ P0(spinlock_t *lo, int *x)
 	spin_unlock(lo);
 }
 
-P1(spinlock_t *lo, int *x)
+P1(spinlock_t *lo, int *x) // Consumer
 {
 	int r1;
 	int r2;
@@ -34,4 +34,4 @@ P1(spinlock_t *lo, int *x)
 	r3 = spin_is_locked(lo);
 }
 
-exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
index 867c75d8b960..aad64397bb8c 100644
--- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -15,14 +15,14 @@ C MP+polockonce+poacquiresilsil
 	int x;
 }
 
-P0(spinlock_t *lo, int *x)
+P0(spinlock_t *lo, int *x) // Producer
 {
 	spin_lock(lo);
 	WRITE_ONCE(*x, 1);
 	spin_unlock(lo);
 }
 
-P1(spinlock_t *lo, int *x)
+P1(spinlock_t *lo, int *x) // Consumer
 {
 	int r1;
 	int r2;
@@ -33,4 +33,4 @@ P1(spinlock_t *lo, int *x)
 	r3 = spin_is_locked(lo);
 }
 
-exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
index 4b0c2edcc029..21cbca6f3be4 100644
--- a/tools/memory-model/litmus-tests/MP+polocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -17,7 +17,7 @@ C MP+polocks
 	int flag;
 }
 
-P0(int *buf, int *flag, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	spin_lock(mylock);
@@ -25,7 +25,7 @@ P0(int *buf, int *flag, spinlock_t *mylock)
 	spin_unlock(mylock);
 }
 
-P1(int *buf, int *flag, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock) // Consumer
 {
 	int r0;
 	int r1;
@@ -36,4 +36,4 @@ P1(int *buf, int *flag, spinlock_t *mylock)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index 3010bbaec46c..9f9769d647c7 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -12,13 +12,13 @@ C MP+poonceonces
 	int flag;
 }
 
-P0(int *buf, int *flag)
+P0(int *buf, int *flag) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *buf, int *flag)
+P1(int *buf, int *flag) // Consumer
 {
 	int r0;
 	int r1;
@@ -27,4 +27,4 @@ P1(int *buf, int *flag)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
index 21e825d5dea6..cbe28e733443 100644
--- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -13,13 +13,13 @@ C MP+pooncerelease+poacquireonce
 	int flag;
 }
 
-P0(int *buf, int *flag)
+P0(int *buf, int *flag) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	smp_store_release(flag, 1);
 }
 
-P1(int *buf, int *flag)
+P1(int *buf, int *flag) // Consumer
 {
 	int r0;
 	int r1;
@@ -28,4 +28,4 @@ P1(int *buf, int *flag)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
index 9691d55b4e21..012041bd4feb 100644
--- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -17,7 +17,7 @@ C MP+porevlocks
 	int flag;
 }
 
-P0(int *buf, int *flag, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock) // Consumer
 {
 	int r0;
 	int r1;
@@ -28,7 +28,7 @@ P0(int *buf, int *flag, spinlock_t *mylock)
 	spin_unlock(mylock);
 }
 
-P1(int *buf, int *flag, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock) // Producer
 {
 	spin_lock(mylock);
 	WRITE_ONCE(*buf, 1);
@@ -36,4 +36,4 @@ P1(int *buf, int *flag, spinlock_t *mylock)
 	WRITE_ONCE(*flag, 1);
 }
 
-exists (0:r0=1 /\ 0:r1=0)
+exists (0:r0=1 /\ 0:r1=0) (* Bad outcome. *)
-- 
cgit v1.2.3


From f52b8fd332573106e60958617a3d2e30611ce1fb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 6 Nov 2020 14:54:02 -0800
Subject: bpf: selftest: Use static globals in tcp_hdr_options and
 btf_skc_cls_ingress

Some globals in the tcp_hdr_options test and btf_skc_cls_ingress test
are not using static scope.  This patch fixes it.

Targeting bpf-next branch as an improvement since it currently does not
break the build.

Fixes: ad2f8eb0095e ("bpf: selftests: Tcp header options")
Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201106225402.4135741-1-kafai@fb.com
---
 tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c     | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 86ccf37e26b3..762f6a9da8b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -17,7 +17,7 @@
 #include "test_btf_skc_cls_ingress.skel.h"
 
 static struct test_btf_skc_cls_ingress *skel;
-struct sockaddr_in6 srv_sa6;
+static struct sockaddr_in6 srv_sa6;
 static __u32 duration;
 
 #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index c85174cdcb77..08d19cafd5e8 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -18,12 +18,12 @@
 #define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-hdr-opt-test"
 
-struct bpf_test_option exp_passive_estab_in;
-struct bpf_test_option exp_active_estab_in;
-struct bpf_test_option exp_passive_fin_in;
-struct bpf_test_option exp_active_fin_in;
-struct hdr_stg exp_passive_hdr_stg;
-struct hdr_stg exp_active_hdr_stg = { .active = true, };
+static struct bpf_test_option exp_passive_estab_in;
+static struct bpf_test_option exp_active_estab_in;
+static struct bpf_test_option exp_passive_fin_in;
+static struct bpf_test_option exp_active_fin_in;
+static struct hdr_stg exp_passive_hdr_stg;
+static struct hdr_stg exp_active_hdr_stg = { .active = true, };
 
 static struct test_misc_tcp_hdr_options *misc_skel;
 static struct test_tcp_hdr_options *skel;
-- 
cgit v1.2.3


From 1db32acfde741359b0b1b9962ae8cd501c2ff769 Mon Sep 17 00:00:00 2001
From: Tanner Love <tannerlove@google.com>
Date: Fri, 6 Nov 2020 13:07:41 -0500
Subject: selftests/net: test max_num_members, fanout_args in psock_fanout

Add an additional control test that verifies:
-specifying two different max_num_members values fails
-specifying max_num_members > PACKET_FANOUT_MAX fails

In datapath tests, set max_num_members to PACKET_FANOUT_MAX.

Signed-off-by: Tanner Love <tannerlove@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/psock_fanout.c | 72 ++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 2c522f7a0aec..db4521335722 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -56,12 +56,15 @@
 
 #define RING_NUM_FRAMES			20
 
+static uint32_t cfg_max_num_members;
+
 /* Open a socket in a given fanout mode.
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
 	struct sockaddr_ll addr = {0};
-	int fd, val;
+	struct fanout_args args;
+	int fd, val, err;
 
 	fd = socket(PF_PACKET, SOCK_RAW, 0);
 	if (fd < 0) {
@@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 		exit(1);
 	}
 
-	val = (((int) typeflags) << 16) | group_id;
-	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+	if (cfg_max_num_members) {
+		args.id = group_id;
+		args.type_flags = typeflags;
+		args.max_num_members = cfg_max_num_members;
+		err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args,
+				 sizeof(args));
+	} else {
+		val = (((int) typeflags) << 16) | group_id;
+		err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val,
+				 sizeof(val));
+	}
+	if (err) {
 		if (close(fd)) {
 			perror("close packet");
 			exit(1);
@@ -286,6 +299,56 @@ static void test_control_group(void)
 	}
 }
 
+/* Test illegal max_num_members values */
+static void test_control_group_max_num_members(void)
+{
+	int fds[3];
+
+	fprintf(stderr, "test: control multiple sockets, max_num_members\n");
+
+	/* expected failure on greater than PACKET_FANOUT_MAX */
+	cfg_max_num_members = (1 << 16) + 1;
+	if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+		fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
+		exit(1);
+	}
+
+	cfg_max_num_members = 256;
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed open\n");
+		exit(1);
+	}
+
+	/* expected failure on joining group with different max_num_members */
+	cfg_max_num_members = 257;
+	if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+		fprintf(stderr, "ERROR: set different max_num_members\n");
+		exit(1);
+	}
+
+	/* success on joining group with same max_num_members */
+	cfg_max_num_members = 256;
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+
+	/* success on joining group with max_num_members unspecified */
+	cfg_max_num_members = 0;
+	fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[2] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+
+	if (close(fds[2]) || close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
 /* Test creating a unique fanout group ids */
 static void test_unique_fanout_group_ids(void)
 {
@@ -426,8 +489,11 @@ int main(int argc, char **argv)
 
 	test_control_single();
 	test_control_group();
+	test_control_group_max_num_members();
 	test_unique_fanout_group_ids();
 
+	/* PACKET_FANOUT_MAX */
+	cfg_max_num_members = 1 << 16;
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
 			    expect_hash[0], expect_hash[1]);
-- 
cgit v1.2.3


From ff2c395b9257f0e617f9cd212893f3c72c80ee6c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:40 +1100
Subject: selftests/gpio: Use TEST_GEN_PROGS_EXTENDED

Use TEST_GEN_PROGS_EXTENDED rather than TEST_PROGS_EXTENDED.

That tells the lib.mk logic that the files it references are to be
generated by the Makefile.

Having done that we don't need to override the all rule.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 32bdc978a711..c85fb5acf5f4 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -11,22 +11,20 @@ LDLIBS += $(VAR_LDLIBS)
 
 TEST_PROGS := gpio-mockup.sh
 TEST_FILES := gpio-mockup-sysfs.sh
-TEST_PROGS_EXTENDED := gpio-mockup-chardev
+TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev
 
 GPIODIR := $(realpath ../../../gpio)
 GPIOOBJ := gpio-utils.o
 
-all: $(TEST_PROGS_EXTENDED)
-
 override define CLEAN
-	$(RM) $(TEST_PROGS_EXTENDED)
+	$(RM) $(TEST_GEN_PROGS_EXTENDED)
 	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
 endef
 
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
+$(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
 
 $(GPIODIR)/$(GPIOOBJ):
 	$(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
-- 
cgit v1.2.3


From 449539da2e237336bc750b41f1736a77f9aca25c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:41 +1100
Subject: selftests/gpio: Move include of lib.mk up

Move the include of lib.mk up so that in a subsequent patch we can use
OUTPUT, which is initialised by lib.mk, in the definition of the GPIO
variables.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index c85fb5acf5f4..615c8a953ade 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -13,6 +13,9 @@ TEST_PROGS := gpio-mockup.sh
 TEST_FILES := gpio-mockup-sysfs.sh
 TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev
 
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
 GPIODIR := $(realpath ../../../gpio)
 GPIOOBJ := gpio-utils.o
 
@@ -21,9 +24,6 @@ override define CLEAN
 	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
 endef
 
-KSFT_KHDR_INSTALL := 1
-include ../lib.mk
-
 $(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
 
 $(GPIODIR)/$(GPIOOBJ):
-- 
cgit v1.2.3


From b68c1c65dec5fb5186ebd33ce52059b4c6db8500 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:42 +1100
Subject: selftests/gpio: Fix build when source tree is read only

Currently the gpio selftests fail to build if the source tree is read
only:

  make -j 160 -C tools/testing/selftests TARGETS=gpio
  make[1]: Entering directory '/linux/tools/testing/selftests/gpio'
  make OUTPUT=/linux/tools/gpio/ -C /linux/tools/gpio
  make[2]: Entering directory '/linux/tools/gpio'
  mkdir -p /linux/tools/gpio/include/linux 2>&1 || true
  ln -sf /linux/tools/gpio/../../include/uapi/linux/gpio.h /linux/tools/gpio/include/linux/gpio.h
  ln: failed to create symbolic link '/linux/tools/gpio/include/linux/gpio.h': Read-only file system

This happens because we ask make to build ../../../gpio (tools/gpio)
without pointing OUTPUT away from the source directory.

To fix it we create a subdirectory of the existing OUTPUT directory,
called tools-gpio, and tell tools/gpio to build in there.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 615c8a953ade..acf4088a9891 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -17,14 +17,18 @@ KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
 GPIODIR := $(realpath ../../../gpio)
-GPIOOBJ := gpio-utils.o
+GPIOOUT := $(OUTPUT)/tools-gpio/
+GPIOOBJ := $(GPIOOUT)/gpio-utils.o
 
 override define CLEAN
 	$(RM) $(TEST_GEN_PROGS_EXTENDED)
-	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
+	$(RM) -rf $(GPIOOUT)
 endef
 
-$(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
+$(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ)
 
-$(GPIODIR)/$(GPIOOBJ):
-	$(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
+$(GPIOOUT):
+	mkdir -p $@
+
+$(GPIOOBJ): $(GPIOOUT)
+	$(MAKE) OUTPUT=$(GPIOOUT) -C $(GPIODIR)
-- 
cgit v1.2.3


From 85128c5bcdf9bd9b574d7cbafa49170a39fed2e1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:43 +1100
Subject: selftests/gpio: Add to CLEAN rule rather than overriding

Rather than overriding the CLEAN rule we can just append to it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index acf4088a9891..41582fe485ee 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -20,10 +20,7 @@ GPIODIR := $(realpath ../../../gpio)
 GPIOOUT := $(OUTPUT)/tools-gpio/
 GPIOOBJ := $(GPIOOUT)/gpio-utils.o
 
-override define CLEAN
-	$(RM) $(TEST_GEN_PROGS_EXTENDED)
-	$(RM) -rf $(GPIOOUT)
-endef
+CLEAN += ; $(RM) -rf $(GPIOOUT)
 
 $(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ)
 
-- 
cgit v1.2.3


From fc4a3a1bf9ad799181e4d4ec9c2598c0405bc27d Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Mon, 2 Nov 2020 09:39:18 +0200
Subject: selftests: intel_pstate: ftime() is deprecated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use clock_gettime() instead of deprecated ftime().

  aperf.c: In function ‘main’:
  aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations]
     58 |  ftime(&before);
        |  ^~~~~
  In file included from aperf.c:9:
  /usr/include/sys/timeb.h:39:12: note: declared here
     39 | extern int ftime (struct timeb *__timebuf)
        |            ^~~~~

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/intel_pstate/aperf.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index f6cd03a87493..a8acf3996973 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -10,8 +10,12 @@
 #include <sched.h>
 #include <errno.h>
 #include <string.h>
+#include <time.h>
 #include "../kselftest.h"
 
+#define MSEC_PER_SEC	1000L
+#define NSEC_PER_MSEC	1000000L
+
 void usage(char *name) {
 	printf ("Usage: %s cpunum\n", name);
 }
@@ -22,7 +26,7 @@ int main(int argc, char **argv) {
 	long long tsc, old_tsc, new_tsc;
 	long long aperf, old_aperf, new_aperf;
 	long long mperf, old_mperf, new_mperf;
-	struct timeb before, after;
+	struct timespec before, after;
 	long long int start, finish, total;
 	cpu_set_t cpuset;
 
@@ -55,7 +59,10 @@ int main(int argc, char **argv) {
 		return 1;
 	}
 
-	ftime(&before);
+	if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) {
+		perror("clock_gettime");
+		return 1;
+	}
 	pread(fd, &old_tsc,  sizeof(old_tsc), 0x10);
 	pread(fd, &old_aperf,  sizeof(old_mperf), 0xe7);
 	pread(fd, &old_mperf,  sizeof(old_aperf), 0xe8);
@@ -64,7 +71,10 @@ int main(int argc, char **argv) {
 		sqrt(i);
 	}
 
-	ftime(&after);
+	if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) {
+		perror("clock_gettime");
+		return 1;
+	}
 	pread(fd, &new_tsc,  sizeof(new_tsc), 0x10);
 	pread(fd, &new_aperf,  sizeof(new_mperf), 0xe7);
 	pread(fd, &new_mperf,  sizeof(new_aperf), 0xe8);
@@ -73,11 +83,11 @@ int main(int argc, char **argv) {
 	aperf = new_aperf-old_aperf;
 	mperf = new_mperf-old_mperf;
 
-	start = before.time*1000 + before.millitm;
-	finish = after.time*1000 + after.millitm;
+	start = before.tv_sec*MSEC_PER_SEC + before.tv_nsec/NSEC_PER_MSEC;
+	finish = after.tv_sec*MSEC_PER_SEC + after.tv_nsec/NSEC_PER_MSEC;
 	total = finish - start;
 
-	printf("runTime: %4.2f\n", 1.0*total/1000);
+	printf("runTime: %4.2f\n", 1.0*total/MSEC_PER_SEC);
 	printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
 	return 0;
 }
-- 
cgit v1.2.3


From 1c49e3783f8899555190a49024ac86d3d76633cd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:03:05 +1100
Subject: selftests/memfd: Fix implicit declaration warnings

The memfd tests emit several warnings:

  fuse_test.c:261:7: warning: implicit declaration of function 'open'
  fuse_test.c:67:6: warning: implicit declaration of function 'fcntl'
  memfd_test.c:397:6: warning: implicit declaration of function 'fallocate'
  memfd_test.c:64:7: warning: implicit declaration of function 'open'
  memfd_test.c:90:6: warning: implicit declaration of function 'fcntl'

These are all caused by the test not including fcntl.h.

Instead of including linux/fcntl.h, include fcntl.h, which should
eventually cause the former to be included as well.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/memfd/fuse_test.c  | 2 +-
 tools/testing/selftests/memfd/memfd_test.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index b018e835737d..be675002f918 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -20,7 +20,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <linux/falloc.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
 #include <linux/memfd.h>
 #include <sched.h>
 #include <stdio.h>
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 334a7eea2004..74baab83fec3 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -6,7 +6,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <linux/falloc.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
 #include <linux/memfd.h>
 #include <sched.h>
 #include <stdio.h>
-- 
cgit v1.2.3


From 82f147944c650a07831c796c398f5c973dbdde79 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Sat, 7 Nov 2020 17:19:35 +0800
Subject: tool: selftests: fix spelling typo of 'writting'

writting -> writing

Signed-off-by: Wang Qing <wangqing@vivo.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9b0912a01777..9132fae7ad65 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -894,7 +894,7 @@ static int faulting_process(int signal_test)
 				count_verify[nr]);
 	        }
 		/*
-		 * Trigger write protection if there is by writting
+		 * Trigger write protection if there is by writing
 		 * the same value back.
 		 */
 		*area_count(area_dst, nr) = count;
@@ -922,7 +922,7 @@ static int faulting_process(int signal_test)
 				count_verify[nr]); exit(1);
 		}
 		/*
-		 * Trigger write protection if there is by writting
+		 * Trigger write protection if there is by writing
 		 * the same value back.
 		 */
 		*area_count(area_dst, nr) = count;
-- 
cgit v1.2.3


From 93f20eff0cca972d74cb554a2e8b47730228be16 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 28 Oct 2020 16:31:14 +0800
Subject: selftests/run_kselftest.sh: fix dry-run typo

Should be -d instead of -n for dry-run.

Fixes: 5da1918446a1 ("selftests/run_kselftest.sh: Make each test individually selectable")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/run_kselftest.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 609a4ef9300e..97165a83df63 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -48,7 +48,7 @@ while true; do
 		-l | --list)
 			echo "$available"
 			exit 0 ;;
-		-n | --dry-run)
+		-d | --dry-run)
 			dryrun="echo"
 			shift ;;
 		-h | --help)
-- 
cgit v1.2.3


From c2e46f6b3e3551558d44c4dc518b9667cb0d5f8b Mon Sep 17 00:00:00 2001
From: Sachin Sant <sachinp@linux.vnet.ibm.com>
Date: Fri, 6 Nov 2020 13:10:06 +0530
Subject: selftests/cgroup: Fix build on older distros

On older distros struct clone_args does not have a cgroup member,
leading to build errors:

 cgroup_util.c: In function 'clone_into_cgroup':
 cgroup_util.c:343:4: error: 'struct clone_args' has no member named 'cgroup'
 cgroup_util.c:346:33: error: invalid application of 'sizeof' to incomplete
  type 'struct clone_args'

But the selftests already have a locally defined version of the
structure which is up to date, called __clone_args.

So use __clone_args which fixes the error.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sachin Sant <sachinp@linux.vnet.ibm.com>>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/cgroup/cgroup_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 05853b0b8831..027014662fb2 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -337,13 +337,13 @@ pid_t clone_into_cgroup(int cgroup_fd)
 #ifdef CLONE_ARGS_SIZE_VER2
 	pid_t pid;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_INTO_CGROUP,
 		.exit_signal = SIGCHLD,
 		.cgroup = cgroup_fd,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(struct __clone_args));
 	/*
 	 * Verify that this is a genuine test failure:
 	 * ENOSYS -> clone3() not available
-- 
cgit v1.2.3


From 1ccd58331f6f2af73758e572f8aa0215b0cacc0e Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 7 Nov 2020 17:47:17 +0100
Subject: selftests: disable rp_filter when testing bareudp

Some systems have rp_filter=1 as default configuration. This breaks
bareudp.sh as the intermediate namespaces handle part of the routing
with regular IPv4 routes but the reverse path is done with tc
(flower/tunnel_key/mirred).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/28140b7d20161e4f766b558018fe2718f9bc1117.1604767577.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/bareudp.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index c6fe22de7d0e..c2b9e990e544 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -234,6 +234,12 @@ setup_overlay_ipv4()
 	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
 	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
 	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+
+	# The intermediate namespaces don't have routes for the reverse path,
+	# as it will be handled by tc. So we need to ensure that rp_filter is
+	# not going to block the traffic.
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 }
 
 setup_overlay_ipv6()
-- 
cgit v1.2.3


From 5329722057d41aebc31e391907a501feaa42f7d9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 9 Nov 2020 17:19:29 -0800
Subject: bpf: Assign ID to vmlinux BTF and return extra info for BTF in
 GET_OBJ_INFO

Allocate ID for vmlinux BTF. This makes it visible when iterating over all BTF
objects in the system. To allow distinguishing vmlinux BTF (and later kernel
module BTF) from user-provided BTFs, expose extra kernel_btf flag, as well as
BTF name ("vmlinux" for vmlinux BTF, will equal to module's name for module
BTF).  We might want to later allow specifying BTF name for user-provided BTFs
as well, if that makes sense. But currently this is reserved only for
in-kernel BTFs.

Having in-kernel BTFs exposed IDs will allow to extend BPF APIs that require
in-kernel BTF type with ability to specify BTF types from kernel modules, not
just vmlinux BTF. This will be implemented in a follow up patch set for
fentry/fexit/fmod_ret/lsm/etc.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201110011932.3201430-3-andrii@kernel.org
---
 tools/include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9879d6793e90..162999b12790 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4466,6 +4466,9 @@ struct bpf_btf_info {
 	__aligned_u64 btf;
 	__u32 btf_size;
 	__u32 id;
+	__aligned_u64 name;
+	__u32 name_len;
+	__u32 kernel_btf;
 } __attribute__((aligned(8)));
 
 struct bpf_link_info {
-- 
cgit v1.2.3


From cecaf4a0f2dcbd7e76156f6d63748b84c176b95b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 9 Nov 2020 17:19:32 -0800
Subject: tools/bpftool: Add support for in-kernel and named BTF in `btf show`

Display vmlinux BTF name and kernel module names when listing available BTFs
on the system.

In human-readable output mode, module BTFs are reported with "name
[module-name]", while vmlinux BTF will be reported as "name [vmlinux]".
Square brackets are added by bpftool and follow kernel convention when
displaying modules in human-readable text outputs.

[vmuser@archvm bpf]$ sudo ../../../bpf/bpftool/bpftool btf s
1: name [vmlinux]  size 4082281B
6: size 2365B  prog_ids 8,6  map_ids 3
7: name [button]  size 46895B
8: name [pcspkr]  size 42328B
9: name [serio_raw]  size 39375B
10: name [floppy]  size 57185B
11: name [i2c_core]  size 76186B
12: name [crc32c_intel]  size 16036B
13: name [i2c_piix4]  size 50497B
14: name [irqbypass]  size 14124B
15: name [kvm]  size 197985B
16: name [kvm_intel]  size 123564B
17: name [cryptd]  size 42466B
18: name [crypto_simd]  size 17187B
19: name [glue_helper]  size 39205B
20: name [aesni_intel]  size 41034B
25: size 36150B
        pids bpftool(2519)

In JSON mode, two fields (boolean "kernel" and string "name") are reported for
each BTF object. vmlinux BTF is reported with name "vmlinux" (kernel itself
returns and empty name for vmlinux BTF).

[vmuser@archvm bpf]$ sudo ../../../bpf/bpftool/bpftool btf s -jp
[{
        "id": 1,
        "size": 4082281,
        "prog_ids": [],
        "map_ids": [],
        "kernel": true,
        "name": "vmlinux"
    },{
        "id": 6,
        "size": 2365,
        "prog_ids": [8,6
        ],
        "map_ids": [3
        ],
        "kernel": false
    },{
        "id": 7,
        "size": 46895,
        "prog_ids": [],
        "map_ids": [],
        "kernel": true,
        "name": "button"
    },{

...

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20201110011932.3201430-6-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index c96b56e8e3a4..ed5e97157241 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -742,9 +742,14 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 	       struct btf_attach_table *btf_map_table)
 {
 	struct btf_attach_point *obj;
+	const char *name = u64_to_ptr(info->name);
 	int n;
 
 	printf("%u: ", info->id);
+	if (info->kernel_btf)
+		printf("name [%s]  ", name);
+	else if (name && name[0])
+		printf("name %s  ", name);
 	printf("size %uB", info->btf_size);
 
 	n = 0;
@@ -771,6 +776,7 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 	      struct btf_attach_table *btf_map_table)
 {
 	struct btf_attach_point *obj;
+	const char *name = u64_to_ptr(info->name);
 
 	jsonw_start_object(json_wtr);	/* btf object */
 	jsonw_uint_field(json_wtr, "id", info->id);
@@ -796,6 +802,11 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 
 	emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
 
+	jsonw_bool_field(json_wtr, "kernel", info->kernel_btf);
+
+	if (name && name[0])
+		jsonw_string_field(json_wtr, "name", name);
+
 	jsonw_end_object(json_wtr);	/* btf object */
 }
 
@@ -803,15 +814,30 @@ static int
 show_btf(int fd, struct btf_attach_table *btf_prog_table,
 	 struct btf_attach_table *btf_map_table)
 {
-	struct bpf_btf_info info = {};
+	struct bpf_btf_info info;
 	__u32 len = sizeof(info);
+	char name[64];
 	int err;
 
+	memset(&info, 0, sizeof(info));
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	if (err) {
 		p_err("can't get BTF object info: %s", strerror(errno));
 		return -1;
 	}
+	/* if kernel support emitting BTF object name, pass name pointer */
+	if (info.name_len) {
+		memset(&info, 0, sizeof(info));
+		info.name_len = sizeof(name);
+		info.name = ptr_to_u64(name);
+		len = sizeof(info);
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get BTF object info: %s", strerror(errno));
+			return -1;
+		}
+	}
 
 	if (json_output)
 		show_btf_json(&info, fd, btf_prog_table, btf_map_table);
-- 
cgit v1.2.3


From 58cfa49c2ba7f815adccc27a775e7cf8a8f7f539 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 10 Nov 2020 09:50:12 +0800
Subject: selftest/bpf: Add missed ip6ip6 test back

In comment 173ca26e9b51 ("samples/bpf: add comprehensive ipip, ipip6,
ip6ip6 test") we added ip6ip6 test for bpf tunnel testing. But in commit
933a741e3b82 ("selftests/bpf: bpf tunnel test.") when we moved it to
the current folder, we didn't add it.

This patch add the ip6ip6 test back to bpf tunnel test. Update the ipip6's
topology for both IPv4 and IPv6 testing. Since iperf test is removed as
currect framework simplified it in purpose, I also removed unused tcp
checkings in test_tunnel_kern.c.

Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201110015013.1570716-2-liuhangbin@gmail.com
---
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 42 ++++-----------------
 tools/testing/selftests/bpf/test_tunnel.sh         | 43 ++++++++++++++++++++--
 2 files changed, 46 insertions(+), 39 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index f48dbfe24ddc..a621b58ab079 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -15,7 +15,6 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
-#include <linux/tcp.h>
 #include <linux/socket.h>
 #include <linux/pkt_cls.h>
 #include <linux/erspan.h>
@@ -528,12 +527,11 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
 	struct iphdr *iph = data;
-	struct tcphdr *tcp = data + sizeof(*iph);
 	void *data_end = (void *)(long)skb->data_end;
 	int ret;
 
 	/* single length check */
-	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+	if (data + sizeof(*iph) > data_end) {
 		ERROR(1);
 		return TC_ACT_SHOT;
 	}
@@ -541,16 +539,6 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_ttl = 64;
 	if (iph->protocol == IPPROTO_ICMP) {
 		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
-	} else {
-		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
-			return TC_ACT_SHOT;
-
-		if (tcp->dest == bpf_htons(5200))
-			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
-		else if (tcp->dest == bpf_htons(5201))
-			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
-		else
-			return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
@@ -585,19 +573,20 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
 	struct iphdr *iph = data;
-	struct tcphdr *tcp = data + sizeof(*iph);
 	void *data_end = (void *)(long)skb->data_end;
 	int ret;
 
 	/* single length check */
-	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+	if (data + sizeof(*iph) > data_end) {
 		ERROR(1);
 		return TC_ACT_SHOT;
 	}
 
 	__builtin_memset(&key, 0x0, sizeof(key));
-	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
 	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	}
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
@@ -634,35 +623,18 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
 	struct ipv6hdr *iph = data;
-	struct tcphdr *tcp = data + sizeof(*iph);
 	void *data_end = (void *)(long)skb->data_end;
 	int ret;
 
 	/* single length check */
-	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+	if (data + sizeof(*iph) > data_end) {
 		ERROR(1);
 		return TC_ACT_SHOT;
 	}
 
-	key.remote_ipv6[0] = bpf_htonl(0x2401db00);
 	key.tunnel_ttl = 64;
-
 	if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
-		key.remote_ipv6[3] = bpf_htonl(1);
-	} else {
-		if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
-			ERROR(iph->nexthdr);
-			return TC_ACT_SHOT;
-		}
-
-		if (tcp->dest == bpf_htons(5200)) {
-			key.remote_ipv6[3] = bpf_htonl(1);
-		} else if (tcp->dest == bpf_htons(5201)) {
-			key.remote_ipv6[3] = bpf_htonl(2);
-		} else {
-			ERROR(tcp->dest);
-			return TC_ACT_SHOT;
-		}
+		key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
 	}
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index bd12ec97a44d..1ccbe804e8e1 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -24,12 +24,12 @@
 # Root namespace with metadata-mode tunnel + BPF
 # Device names and addresses:
 # 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
 #
 # Namespace at_ns0 with native tunnel
 # Device names and addresses:
 # 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
 #
 #
 # End-to-end ping packet flow
@@ -250,7 +250,7 @@ add_ipip_tunnel()
 	ip addr add dev $DEV 10.1.1.200/24
 }
 
-add_ipip6tnl_tunnel()
+add_ip6tnl_tunnel()
 {
 	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
 	ip netns exec at_ns0 ip link set dev veth0 up
@@ -262,11 +262,13 @@ add_ipip6tnl_tunnel()
 		ip link add dev $DEV_NS type $TYPE \
 		local ::11 remote ::22
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
 	ip netns exec at_ns0 ip link set dev $DEV_NS up
 
 	# root namespace
 	ip link add dev $DEV type $TYPE external
 	ip addr add dev $DEV 10.1.1.200/24
+	ip addr add dev $DEV 1::22/96
 	ip link set dev $DEV up
 }
 
@@ -534,7 +536,7 @@ test_ipip6()
 
 	check $TYPE
 	config_device
-	add_ipip6tnl_tunnel
+	add_ip6tnl_tunnel
 	ip link set dev veth1 mtu 1500
 	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
 	# underlay
@@ -553,6 +555,34 @@ test_ipip6()
         echo -e ${GREEN}"PASS: $TYPE"${NC}
 }
 
+test_ip6ip6()
+{
+	TYPE=ip6tnl
+	DEV_NS=ip6ip6tnl00
+	DEV=ip6ip6tnl11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6tnl_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip6 over ip6
+	ping6 $PING_ARG 1::11
+	check_err $?
+	ip netns exec at_ns0 ping6 $PING_ARG 1::22
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
 setup_xfrm_tunnel()
 {
 	auth=0x$(printf '1%.0s' {1..40})
@@ -646,6 +676,7 @@ cleanup()
 	ip link del veth1 2> /dev/null
 	ip link del ipip11 2> /dev/null
 	ip link del ipip6tnl11 2> /dev/null
+	ip link del ip6ip6tnl11 2> /dev/null
 	ip link del gretap11 2> /dev/null
 	ip link del ip6gre11 2> /dev/null
 	ip link del ip6gretap11 2> /dev/null
@@ -742,6 +773,10 @@ bpf_tunnel_test()
 	test_ipip6
 	errors=$(( $errors + $? ))
 
+	echo "Testing IP6IP6 tunnel..."
+	test_ip6ip6
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPSec tunnel..."
 	test_xfrm_tunnel
 	errors=$(( $errors + $? ))
-- 
cgit v1.2.3


From c8a950d0d3b926a02c7b2e713850d38217cec3d1 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:05 +0100
Subject: tools: Factor HOSTCC, HOSTLD, HOSTAR definitions

Several Makefiles in tools/ need to define the host toolchain variables.
Move their definition to tools/scripts/Makefile.include

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-2-jean-philippe@linaro.org
---
 tools/bpf/resolve_btfids/Makefile |  9 ---------
 tools/build/Makefile              |  4 ----
 tools/objtool/Makefile            |  9 ---------
 tools/perf/Makefile.perf          |  4 ----
 tools/power/acpi/Makefile.config  |  1 -
 tools/scripts/Makefile.include    | 10 ++++++++++
 6 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 66cb92136de4..bf656432ad73 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -18,15 +18,6 @@ else
 endif
 
 # always use the host compiler
-ifneq ($(LLVM),)
-HOSTAR  ?= llvm-ar
-HOSTCC  ?= clang
-HOSTLD  ?= ld.lld
-else
-HOSTAR  ?= ar
-HOSTCC  ?= gcc
-HOSTLD  ?= ld
-endif
 AR       = $(HOSTAR)
 CC       = $(HOSTCC)
 LD       = $(HOSTLD)
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 722f1700d96a..bae48e6fa995 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -15,10 +15,6 @@ endef
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
-HOSTCC ?= gcc
-HOSTLD ?= ld
-HOSTAR ?= ar
-
 export HOSTCC HOSTLD HOSTAR
 
 ifeq ($(V),1)
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 4ea9a833dde7..5cdb19036d7f 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -3,15 +3,6 @@ include ../scripts/Makefile.include
 include ../scripts/Makefile.arch
 
 # always use the host compiler
-ifneq ($(LLVM),)
-HOSTAR	?= llvm-ar
-HOSTCC	?= clang
-HOSTLD	?= ld.lld
-else
-HOSTAR	?= ar
-HOSTCC	?= gcc
-HOSTLD	?= ld
-endif
 AR	 = $(HOSTAR)
 CC	 = $(HOSTCC)
 LD	 = $(HOSTLD)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 7ce3f2e8b9c7..62f3deb1d3a8 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -175,10 +175,6 @@ endef
 
 LD += $(EXTRA_LDFLAGS)
 
-HOSTCC  ?= gcc
-HOSTLD  ?= ld
-HOSTAR  ?= ar
-
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 LLVM_CONFIG ?= llvm-config
 
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index 54a2857c2510..331f6d30f472 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -54,7 +54,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
 LD = $(CC)
-HOSTCC = gcc
 
 # check if compiler option is supported
 cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index a7974638561c..1358e89cdf7d 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -59,6 +59,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
 $(call allow-override,CXX,$(CROSS_COMPILE)g++)
 $(call allow-override,STRIP,$(CROSS_COMPILE)strip)
 
+ifneq ($(LLVM),)
+HOSTAR  ?= llvm-ar
+HOSTCC  ?= clang
+HOSTLD  ?= ld.lld
+else
+HOSTAR  ?= ar
+HOSTCC  ?= gcc
+HOSTLD  ?= ld
+endif
+
 ifeq ($(CC_NO_CLANG), 1)
 EXTRA_WARNINGS += -Wstrict-aliasing=3
 endif
-- 
cgit v1.2.3


From 9e8929fdbb9c9026bd3a732e9ac7dc9617c86309 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:06 +0100
Subject: tools/bpftool: Force clean of out-of-tree build

Cleaning a partial build can fail if the output directory for libbpf
wasn't created:

$ make -C tools/bpf/bpftool O=/tmp/bpf clean
/bin/sh: line 0: cd: /tmp/bpf/libbpf/: No such file or directory
tools/scripts/Makefile.include:17: *** output directory "/tmp/bpf/libbpf/" does not exist.  Stop.
make: *** [Makefile:36: /tmp/bpf/libbpf/libbpf.a-clean] Error 2

As a result make never gets around to clearing the leftover objects. Add
the libbpf output directory as clean dependency to ensure clean always
succeeds (similarly to the "descend" macro). The directory is later
removed by the clean recipe.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-3-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f60e6ad3a1df..1358c093b812 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -27,11 +27,13 @@ LIBBPF = $(LIBBPF_PATH)libbpf.a
 
 BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
-$(LIBBPF): FORCE
-	$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
+$(LIBBPF_OUTPUT):
+	$(QUIET_MKDIR)mkdir -p $@
+
+$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
 
-$(LIBBPF)-clean:
+$(LIBBPF)-clean: $(LIBBPF_OUTPUT)
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
-- 
cgit v1.2.3


From 8859b0da5aac28e4e9651c8971e7af344f8ffec1 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:07 +0100
Subject: tools/bpftool: Fix cross-build

The bpftool build first creates an intermediate binary, executed on the
host, to generate skeletons required by the final build. When
cross-building bpftool for an architecture different from the host, the
intermediate binary should be built using the host compiler (gcc) and
the final bpftool using the cross compiler (e.g. aarch64-linux-gnu-gcc).

Generate the intermediate objects into the bootstrap/ directory using
the host toolchain.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-4-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 1358c093b812..d566bced135e 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -19,24 +19,37 @@ BPF_DIR = $(srctree)/tools/lib/bpf/
 ifneq ($(OUTPUT),)
   LIBBPF_OUTPUT = $(OUTPUT)/libbpf/
   LIBBPF_PATH = $(LIBBPF_OUTPUT)
+  BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/
 else
+  LIBBPF_OUTPUT =
   LIBBPF_PATH = $(BPF_DIR)
+  BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/
 endif
 
 LIBBPF = $(LIBBPF_PATH)libbpf.a
+LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/
+LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
 
 BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
-$(LIBBPF_OUTPUT):
+$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT):
 	$(QUIET_MKDIR)mkdir -p $@
 
 $(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
 
+$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT)
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
+		ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@
+
 $(LIBBPF)-clean: $(LIBBPF_OUTPUT)
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
+$(LIBBPF_BOOTSTRAP)-clean: $(LIBBPF_BOOTSTRAP_OUTPUT)
+	$(call QUIET_CLEAN, libbpf-bootstrap)
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null
+
 prefix ?= /usr/local
 bash_compdir ?= /usr/share/bash-completion/completions
 
@@ -94,6 +107,7 @@ CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
 endif
 
 LIBS = $(LIBBPF) -lelf -lz
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
 ifeq ($(feature-libcap), 1)
 CFLAGS += -DUSE_LIBCAP
 LIBS += -lcap
@@ -120,9 +134,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
 SRCS += $(BFD_SRCS)
 endif
 
-BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
-BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
@@ -169,12 +183,16 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 
 $(OUTPUT)feature.o: | zdep
 
-$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP)
+	$(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \
+		$(LIBS_BOOTSTRAP)
 
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
 
+$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT)
+	$(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
@@ -182,11 +200,11 @@ feature-detect-clean:
 	$(call QUIET_CLEAN, feature-detect)
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
-clean: $(LIBBPF)-clean feature-detect-clean
+clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
-	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
+	$(Q)$(RM) -- $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
+	$(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT)
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
 	$(Q)$(RM) -r -- $(OUTPUT)feature/
-- 
cgit v1.2.3


From 3290996e713315dfc9589e2c340a4c4997d90be8 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:08 +0100
Subject: tools/runqslower: Use Makefile.include

Makefile.include defines variables such as OUTPUT and CC for out-of-tree
build and cross-build. Include it into the runqslower Makefile and use
its $(QUIET*) helpers.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-5-jean-philippe@linaro.org
---
 tools/bpf/runqslower/Makefile | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index fb1337d69868..bcc4a7396713 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+include ../../scripts/Makefile.include
+
 OUTPUT := .output
 CLANG ?= clang
 LLC ?= llc
@@ -21,10 +23,8 @@ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 abs_out := $(abspath $(OUTPUT))
 ifeq ($(V),1)
 Q =
-msg =
 else
 Q = @
-msg = @printf '  %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
 MAKEFLAGS += --no-print-directory
 submake_extras := feature_display=0
 endif
@@ -37,12 +37,11 @@ all: runqslower
 runqslower: $(OUTPUT)/runqslower
 
 clean:
-	$(call msg,CLEAN)
+	$(call QUIET_CLEAN, runqslower)
 	$(Q)rm -rf $(OUTPUT) runqslower
 
 $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
-	$(call msg,BINARY,$@)
-	$(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
+	$(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
 
 $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h	      \
 			$(OUTPUT)/runqslower.bpf.o
@@ -50,31 +49,26 @@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h	      \
 $(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h
 
 $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
-	$(call msg,GEN-SKEL,$@)
-	$(Q)$(BPFTOOL) gen skeleton $< > $@
+	$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
 
 $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
-	$(call msg,BPF,$@)
-	$(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES)			      \
+	$(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES)		      \
 		 -c $(filter %.c,$^) -o $@ &&				      \
 	$(LLVM_STRIP) -g $@
 
 $(OUTPUT)/%.o: %.c | $(OUTPUT)
-	$(call msg,CC,$@)
-	$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
+	$(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
 
 $(OUTPUT):
-	$(call msg,MKDIR,$@)
-	$(Q)mkdir -p $(OUTPUT)
+	$(QUIET_MKDIR)mkdir -p $(OUTPUT)
 
 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
-	$(call msg,GEN,$@)
 	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
 		echo "Couldn't find kernel BTF; set VMLINUX_BTF to"	       \
 			"specify its location." >&2;			       \
 		exit 1;\
 	fi
-	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)			       \
-- 
cgit v1.2.3


From 85e59344d0790379e063bf3cd3dd0fe91ce3b505 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:09 +0100
Subject: tools/runqslower: Enable out-of-tree build

Enable out-of-tree build for runqslower. Only set OUTPUT=.output if it
wasn't already set by the user.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-6-jean-philippe@linaro.org
---
 tools/bpf/runqslower/Makefile | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index bcc4a7396713..0fc4d4046193 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -1,15 +1,18 @@
 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 include ../../scripts/Makefile.include
 
-OUTPUT := .output
+OUTPUT ?= $(abspath .output)/
+
 CLANG ?= clang
 LLC ?= llc
 LLVM_STRIP ?= llvm-strip
-DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
+BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 LIBBPF_SRC := $(abspath ../../lib/bpf)
-BPFOBJ := $(OUTPUT)/libbpf.a
-BPF_INCLUDE := $(OUTPUT)
+BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
+BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
+BPF_INCLUDE := $(BPFOBJ_OUTPUT)
 INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib)        \
        -I$(abspath ../../include/uapi)
 CFLAGS := -g -Wall
@@ -20,7 +23,6 @@ VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
 VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 					  $(wildcard $(VMLINUX_BTF_PATHS))))
 
-abs_out := $(abspath $(OUTPUT))
 ifeq ($(V),1)
 Q =
 else
@@ -38,7 +40,11 @@ runqslower: $(OUTPUT)/runqslower
 
 clean:
 	$(call QUIET_CLEAN, runqslower)
-	$(Q)rm -rf $(OUTPUT) runqslower
+	$(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT)
+	$(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d
+	$(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
+	$(Q)$(RM) $(OUTPUT)runqslower
+	$(Q)$(RM) -r .output
 
 $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
@@ -59,8 +65,8 @@ $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
 $(OUTPUT)/%.o: %.c | $(OUTPUT)
 	$(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
 
-$(OUTPUT):
-	$(QUIET_MKDIR)mkdir -p $(OUTPUT)
+$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
+	$(QUIET_MKDIR)mkdir -p $@
 
 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
 	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
@@ -70,10 +76,8 @@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
 	fi
 	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
 
-$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)			       \
-		    OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
 
-$(DEFAULT_BPFTOOL):
-	$(Q)$(MAKE) $(submake_extras) -C ../bpftool			      \
-		    prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install
+$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)
-- 
cgit v1.2.3


From 2d9393fefb506fb8c8a483e5dc9bbd7774657b89 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:10 +0100
Subject: tools/runqslower: Build bpftool using HOSTCC

When cross building runqslower for an other architecture, the
intermediate bpftool used to generate a skeleton must be built using the
host toolchain. Pass HOSTCC and HOSTLD, defined in Makefile.include, to
the bpftool Makefile.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-7-jean-philippe@linaro.org
---
 tools/bpf/runqslower/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 0fc4d4046193..4d5ca54fcd4c 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -80,4 +80,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU
 	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
 
 $(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)   \
+		    CC=$(HOSTCC) LD=$(HOSTLD)
-- 
cgit v1.2.3


From 0639e5e97ad9c58dd15dcf6f6ccf677cfba39f98 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:11 +0100
Subject: tools/bpftool: Fix build slowdown

Commit ba2fd563b740 ("tools/bpftool: Support passing BPFTOOL_VERSION to
make") changed BPFTOOL_VERSION to a recursively expanded variable,
forcing it to be recomputed on every expansion of CFLAGS and
dramatically slowing down the bpftool build. Restore BPFTOOL_VERSION as
a simply expanded variable, guarded by an ifeq().

Fixes: ba2fd563b740 ("tools/bpftool: Support passing BPFTOOL_VERSION to make")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-8-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index d566bced135e..804ade95929f 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -30,7 +30,9 @@ LIBBPF = $(LIBBPF_PATH)libbpf.a
 LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/
 LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
 
-BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+ifeq ($(BPFTOOL_VERSION),)
+BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+endif
 
 $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT):
 	$(QUIET_MKDIR)mkdir -p $@
-- 
cgit v1.2.3


From ef1220a7d4bbdb5fc435d691776778568dfb69a8 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 10 Nov 2020 10:00:48 +0800
Subject: selftests: pmtu.sh: use $ksft_skip for skipped return code

This test uses return code 2 as a hard-coded skipped state, let's use
the kselftest framework skip code variable $ksft_skip instead to make
it more readable and easier to maintain.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh | 64 ++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 6bbf69a28e12..fb53987ab64b 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -355,7 +355,7 @@ setup_fou_or_gue() {
 	encap="${3}"
 
 	if [ "${outer}" = "4" ]; then
-		modprobe fou || return 2
+		modprobe fou || return $ksft_skip
 		a_addr="${prefix4}.${a_r1}.1"
 		b_addr="${prefix4}.${b_r1}.1"
 		if [ "${inner}" = "4" ]; then
@@ -366,7 +366,7 @@ setup_fou_or_gue() {
 			ipproto="41"
 		fi
 	else
-		modprobe fou6 || return 2
+		modprobe fou6 || return $ksft_skip
 		a_addr="${prefix6}:${a_r1}::1"
 		b_addr="${prefix6}:${b_r1}::1"
 		if [ "${inner}" = "4" ]; then
@@ -380,8 +380,8 @@ setup_fou_or_gue() {
 		fi
 	fi
 
-	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
-	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
+	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
 
 	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
 	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
@@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() {
 		fi
 	fi
 
-	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
 	run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
 
 	run_cmd ${ns_a} ip link set ip_a up
@@ -713,7 +713,7 @@ setup_routing() {
 }
 
 setup_bridge() {
-	run_cmd ${ns_a} ip link add br0 type bridge || return 2
+	run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
 	run_cmd ${ns_a} ip link set br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -765,7 +765,7 @@ setup_ovs_vxlan6() {
 }
 
 setup_ovs_bridge() {
-	run_cmd ovs-vsctl add-br ovs_br0 || return 2
+	run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
 	run_cmd ip link set ovs_br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -887,7 +887,7 @@ check_pmtu_value() {
 test_pmtu_ipvX() {
 	family=${1}
 
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing ${type}4 || return 2
+		setup namespaces routing ${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing ${type}6 || return 2
+		setup namespaces routing ${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing bridge bridged_${type}4 || return 2
+		setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing bridge bridged_${type}6 || return 2
+		setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+		setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+		setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
 	encap=${3}
 	ll_mtu=4000
 
-	setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
+	setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
 	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
@@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() {
 	outer=${2}
 	ll_mtu=4000
 
-	setup namespaces routing ip${inner}ip${outer} || return 2
+	setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
 
 	trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
@@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() {
 }
 
 test_pmtu_vti4_exception() {
-	setup namespaces veth vti4 xfrm4 || return 2
+	setup namespaces veth vti4 xfrm4 || return $ksft_skip
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
 	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
 
@@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() {
 }
 
 test_pmtu_vti6_exception() {
-	setup namespaces veth vti6 xfrm6 || return 2
+	setup namespaces veth vti6 xfrm6 || return $ksft_skip
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
 	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
 	fail=0
@@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() {
 }
 
 test_pmtu_vti4_default_mtu() {
-	setup namespaces veth vti4 || return 2
+	setup namespaces veth vti4 || return $ksft_skip
 
 	# Check that MTU of vti device is MTU of veth minus IPv4 header length
 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() {
 }
 
 test_pmtu_vti6_default_mtu() {
-	setup namespaces veth vti6 || return 2
+	setup namespaces veth vti6 || return $ksft_skip
 
 	# Check that MTU of vti device is MTU of veth minus IPv6 header length
 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() {
 }
 
 test_pmtu_vti4_link_add_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
-	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	[ $? -ne 0 ] && err "  vti not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link del vti4_a
 
 	fail=0
@@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() {
 }
 
 test_pmtu_vti6_link_add_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
-	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	[ $? -ne 0 ] && err "  vti6 not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link del vti6_a
 
 	fail=0
@@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() {
 }
 
 test_pmtu_vti6_link_change_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
-	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	[ $? -ne 0 ] && err "  dummy not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
 	run_cmd ${ns_a} ip link set dummy0 up
 	run_cmd ${ns_a} ip link set dummy1 up
@@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() {
 	encap="vxlan"
 	ll_mtu=4000
 
-	check_command taskset || return 2
+	check_command taskset || return $ksft_skip
 	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
 
-	setup namespaces routing ${encap}${outer} || return 2
+	setup namespaces routing ${encap}${outer} || return $ksft_skip
 	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
@@ -1644,7 +1644,7 @@ run_test() {
 		fi
 		err_flush
 		exit 1
-	elif [ $ret -eq 2 ]; then
+	elif [ $ret -eq $ksft_skip ]; then
 		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
 		err_flush
 	fi
@@ -1667,7 +1667,7 @@ run_test_nh() {
 }
 
 test_list_flush_ipv4_exception() {
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -1721,7 +1721,7 @@ test_list_flush_ipv4_exception() {
 }
 
 test_list_flush_ipv6_exception() {
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -1840,7 +1840,7 @@ for t in ${tests}; do
 	if [ $run_this -eq 1 ]; then
 		run_test "${name}" "${desc}"
 		# if test was skipped no need to retry with nexthop objects
-		[ $? -eq 2 ] && rerun_nh=0
+		[ $? -eq $ksft_skip ] && rerun_nh=0
 
 		if [ "${rerun_nh}" = "1" ]; then
 			run_test_nh "${name}" "${desc}"
-- 
cgit v1.2.3


From 2a9d3716b810a4f2c8291b7aa8f358d11693f6e5 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 10 Nov 2020 10:00:49 +0800
Subject: selftests: pmtu.sh: improve the test result processing

This test will treat all non-zero return codes as failures, it will
make the pmtu.sh test script being marked as FAILED when some
sub-test got skipped.

Improve the result processing by
  * Only mark the whole test script as SKIP when all of the
    sub-tests were skipped
  * If the sub-tests were either passed or skipped, the overall
    result will be PASS
  * If any of them has failed with return code 1 or anything bad
    happened (e.g. return code 127 for command not found), the
    overall result will be FAIL

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index fb53987ab64b..464e31eabc73 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1652,7 +1652,19 @@ run_test() {
 	return $ret
 	)
 	ret=$?
-	[ $ret -ne 0 ] && exitcode=1
+	case $ret in
+		0)
+			all_skipped=false
+			[ $exitcode=$ksft_skip ] && exitcode=0
+		;;
+		$ksft_skip)
+			[ $all_skipped = true ] && exitcode=$ksft_skip
+		;;
+		*)
+			all_skipped=false
+			exitcode=1
+		;;
+	esac
 
 	return $ret
 }
@@ -1786,6 +1798,7 @@ usage() {
 #
 exitcode=0
 desc=0
+all_skipped=true
 
 while getopts :ptv o
 do
-- 
cgit v1.2.3


From 6a59edd832e2e353809778859d2a4a2d6c94d011 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 12 Nov 2020 10:10:50 +0100
Subject: tools/bpf: Add bootstrap/ to .gitignore

Commit 8859b0da5aac ("tools/bpftool: Fix cross-build") added a
build-time bootstrap/ directory for bpftool, and removed
bpftool-bootstrap. Update .gitignore accordingly.

Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201112091049.3159055-1-jean-philippe@linaro.org
---
 tools/bpf/bpftool/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 3e601bcfd461..944cb4b7c95d 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 *.d
-/bpftool-bootstrap
+/bootstrap/
 /bpftool
 bpftool*.8
 bpf-helpers.*
-- 
cgit v1.2.3


From c36538798fc6c80bd8bdaddad803b0c86dc13d7c Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 12 Nov 2020 10:10:52 +0100
Subject: tools/bpf: Always run the *-clean recipes

Make $(LIBBPF)-clean and $(LIBBPF_BOOTSTRAP)-clean .PHONY targets, in
case those files exist. And keep consistency within the Makefile by
making the directory dependencies order-only.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201112091049.3159055-2-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 804ade95929f..f897cb5fb12d 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -44,11 +44,11 @@ $(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
 		ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@
 
-$(LIBBPF)-clean: $(LIBBPF_OUTPUT)
+$(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT)
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
-$(LIBBPF_BOOTSTRAP)-clean: $(LIBBPF_BOOTSTRAP_OUTPUT)
+$(LIBBPF_BOOTSTRAP)-clean: FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT)
 	$(call QUIET_CLEAN, libbpf-bootstrap)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null
 
-- 
cgit v1.2.3


From e865802357086b36632acf3e629f726f089a6769 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 11 Nov 2020 16:05:35 +0100
Subject: selftests: set conf.all.rp_filter=0 in bareudp.sh

When working on the rp_filter problem, I didn't realise that disabling
it on the network devices didn't cover all cases: rp_filter could also
be enabled globally in the namespace, in which case it would drop
packets, even if the net device has rp_filter=0.

Fixes: 1ccd58331f6f ("selftests: disable rp_filter when testing bareudp")
Fixes: bbbc7aa45eef ("selftests: add test script for bareudp tunnels")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/f2d459346471f163b239aa9d63ce3e2ba9c62895.1605107012.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/bareudp.sh | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index c2b9e990e544..f366cadbc5e8 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -238,6 +238,8 @@ setup_overlay_ipv4()
 	# The intermediate namespaces don't have routes for the reverse path,
 	# as it will be handled by tc. So we need to ensure that rp_filter is
 	# not going to block the traffic.
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
 	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 }
-- 
cgit v1.2.3


From 9cc873e85800ccde80aa2e4b2bae9f1b5fa4c478 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 10 Nov 2020 19:12:12 -0800
Subject: selftests/bpf: Add skb_pkt_end test

Add a test that currently makes LLVM generate assembly code:

$ llvm-objdump -S skb_pkt_end.o
0000000000000000 <main_prog>:
; 	if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
       0:	61 12 50 00 00 00 00 00	r2 = *(u32 *)(r1 + 80)
       1:	61 14 4c 00 00 00 00 00	r4 = *(u32 *)(r1 + 76)
       2:	bf 43 00 00 00 00 00 00	r3 = r4
       3:	07 03 00 00 36 00 00 00	r3 += 54
       4:	b7 01 00 00 00 00 00 00	r1 = 0
       5:	2d 23 02 00 00 00 00 00	if r3 > r2 goto +2 <LBB0_2>
       6:	07 04 00 00 0e 00 00 00	r4 += 14
; 	if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
       7:	bf 41 00 00 00 00 00 00	r1 = r4
0000000000000040 <LBB0_2>:
       8:	b4 00 00 00 ff ff ff ff	w0 = -1
; 	if (!(ip = get_iphdr(skb)))
       9:	2d 23 05 00 00 00 00 00	if r3 > r2 goto +5 <LBB0_6>
; 	proto = ip->protocol;
      10:	71 12 09 00 00 00 00 00	r2 = *(u8 *)(r1 + 9)
; 	if (proto != IPPROTO_TCP)
      11:	56 02 03 00 06 00 00 00	if w2 != 6 goto +3 <LBB0_6>
; 	if (tcp->dest != 0)
      12:	69 12 16 00 00 00 00 00	r2 = *(u16 *)(r1 + 22)
      13:	56 02 01 00 00 00 00 00	if w2 != 0 goto +1 <LBB0_6>
; 	return tcp->urg_ptr;
      14:	69 10 26 00 00 00 00 00	r0 = *(u16 *)(r1 + 38)
0000000000000078 <LBB0_6>:
; }
      15:	95 00 00 00 00 00 00 00	exit

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201111031213.25109-3-alexei.starovoitov@gmail.com
---
 .../selftests/bpf/prog_tests/test_skb_pkt_end.c    | 41 ++++++++++++++++
 tools/testing/selftests/bpf/progs/skb_pkt_end.c    | 54 ++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
 create mode 100644 tools/testing/selftests/bpf/progs/skb_pkt_end.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
new file mode 100644
index 000000000000..cf1215531920
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_pkt_end.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+	__u32 duration, retval;
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	if (CHECK(err || retval != 123, "test_run",
+		  "err %d errno %d retval %d duration %d\n",
+		  err, errno, retval, duration))
+		return -1;
+	return 0;
+}
+
+void test_test_skb_pkt_end(void)
+{
+	struct skb_pkt_end *skb_pkt_end_skel = NULL;
+	__u32 duration = 0;
+	int err;
+
+	skb_pkt_end_skel = skb_pkt_end__open_and_load();
+	if (CHECK(!skb_pkt_end_skel, "skb_pkt_end_skel_load", "skb_pkt_end skeleton failed\n"))
+		goto cleanup;
+
+	err = skb_pkt_end__attach(skb_pkt_end_skel);
+	if (CHECK(err, "skb_pkt_end_attach", "skb_pkt_end attach failed: %d\n", err))
+		goto cleanup;
+
+	if (sanity_run(skb_pkt_end_skel->progs.main_prog))
+		goto cleanup;
+
+cleanup:
+	skb_pkt_end__destroy(skb_pkt_end_skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
new file mode 100644
index 000000000000..cf6823f42e80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_PRESERVE_ACCESS_INDEX
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#define NULL 0
+#define INLINE __always_inline
+
+#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
+
+#define ETH_IPV4_TCP_SIZE (14 + sizeof(struct iphdr) + sizeof(struct tcphdr))
+
+static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb)
+{
+	struct iphdr *ip = NULL;
+	struct ethhdr *eth;
+
+	if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
+		goto out;
+
+	eth = (void *)(long)skb->data;
+	ip = (void *)(eth + 1);
+
+out:
+	return ip;
+}
+
+SEC("classifier/cls")
+int main_prog(struct __sk_buff *skb)
+{
+	struct iphdr *ip = NULL;
+	struct tcphdr *tcp;
+	__u8 proto = 0;
+
+	if (!(ip = get_iphdr(skb)))
+		goto out;
+
+	proto = ip->protocol;
+
+	if (proto != IPPROTO_TCP)
+		goto out;
+
+	tcp = (void*)(ip + 1);
+	if (tcp->dest != 0)
+		goto out;
+	if (!tcp)
+		goto out;
+
+	return tcp->urg_ptr;
+out:
+	return -1;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From cb62d34019d9117bb94de6ed35959449d43d6055 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 10 Nov 2020 19:12:13 -0800
Subject: selftests/bpf: Add asm tests for pkt vs pkt_end comparison.

Add few assembly tests for packet comparison.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201111031213.25109-4-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/verifier/ctx_skb.c | 42 ++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 2e16b8e268f2..2022c0f2cd75 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1089,3 +1089,45 @@
 	.errstr_unpriv = "R1 leaks addr",
 	.result = REJECT,
 },
+{
+       "pkt > pkt_end taken check",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,                //  0. r2 = *(u32 *)(r1 + data_end)
+                   offsetof(struct __sk_buff, data_end)),
+       BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,                //  1. r4 = *(u32 *)(r1 + data)
+                   offsetof(struct __sk_buff, data)),
+       BPF_MOV64_REG(BPF_REG_3, BPF_REG_4),                    //  2. r3 = r4
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42),                  //  3. r3 += 42
+       BPF_MOV64_IMM(BPF_REG_1, 0),                            //  4. r1 = 0
+       BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2),          //  5. if r3 > r2 goto 8
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14),                  //  6. r4 += 14
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),                    //  7. r1 = r4
+       BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1),          //  8. if r3 > r2 goto 10
+       BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9),            //  9. r2 = *(u8 *)(r1 + 9)
+       BPF_MOV64_IMM(BPF_REG_0, 0),                            // 10. r0 = 0
+       BPF_EXIT_INSN(),                                        // 11. exit
+       },
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_SK_SKB,
+},
+{
+       "pkt_end < pkt taken check",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,                //  0. r2 = *(u32 *)(r1 + data_end)
+                   offsetof(struct __sk_buff, data_end)),
+       BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,                //  1. r4 = *(u32 *)(r1 + data)
+                   offsetof(struct __sk_buff, data)),
+       BPF_MOV64_REG(BPF_REG_3, BPF_REG_4),                    //  2. r3 = r4
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42),                  //  3. r3 += 42
+       BPF_MOV64_IMM(BPF_REG_1, 0),                            //  4. r1 = 0
+       BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2),          //  5. if r3 > r2 goto 8
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14),                  //  6. r4 += 14
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),                    //  7. r1 = r4
+       BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, 1),          //  8. if r2 < r3 goto 10
+       BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9),            //  9. r2 = *(u8 *)(r1 + 9)
+       BPF_MOV64_IMM(BPF_REG_0, 0),                            // 10. r0 = 0
+       BPF_EXIT_INSN(),                                        // 11. exit
+       },
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_SK_SKB,
+},
-- 
cgit v1.2.3


From 53632e11194663b7d5b043a68648892e593dc102 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 12 Nov 2020 13:13:20 -0800
Subject: bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP

This patch tests storing the task's related info into the
bpf_sk_storage by fentry/fexit tracing at listen, accept,
and connect.  It also tests the raw_tp at inet_sock_set_state.

A negative test is done by tracing the bpf_sk_storage_free()
and using bpf_sk_storage_get() at the same time.  It ensures
this bpf program cannot load.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201112211320.2587537-1-kafai@fb.com
---
 .../selftests/bpf/prog_tests/sk_storage_tracing.c  | 135 +++++++++++++++++++++
 .../bpf/progs/test_sk_storage_trace_itself.c       |  29 +++++
 .../selftests/bpf/progs/test_sk_storage_tracing.c  |  95 +++++++++++++++
 3 files changed, 259 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
new file mode 100644
index 000000000000..2b392590e8ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_sk_storage_trace_itself.skel.h"
+#include "test_sk_storage_tracing.skel.h"
+
+#define LO_ADDR6 "::1"
+#define TEST_COMM "test_progs"
+
+struct sk_stg {
+	__u32 pid;
+	__u32 last_notclose_state;
+	char comm[16];
+};
+
+static struct test_sk_storage_tracing *skel;
+static __u32 duration;
+static pid_t my_pid;
+
+static int check_sk_stg(int sk_fd, __u32 expected_state)
+{
+	struct sk_stg sk_stg;
+	int err;
+
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_stg_map), &sk_fd,
+				  &sk_stg);
+	if (!ASSERT_OK(err, "map_lookup(sk_stg_map)"))
+		return -1;
+
+	if (!ASSERT_EQ(sk_stg.last_notclose_state, expected_state,
+		       "last_notclose_state"))
+		return -1;
+
+	if (!ASSERT_EQ(sk_stg.pid, my_pid, "pid"))
+		return -1;
+
+	if (!ASSERT_STREQ(sk_stg.comm, skel->bss->task_comm, "task_comm"))
+		return -1;
+
+	return 0;
+}
+
+static void do_test(void)
+{
+	int listen_fd = -1, passive_fd = -1, active_fd = -1, value = 1, err;
+	char abyte;
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK(listen_fd == -1, "start_server",
+		  "listen_fd:%d errno:%d\n", listen_fd, errno))
+		return;
+
+	active_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK(active_fd == -1, "connect_to_fd", "active_fd:%d errno:%d\n",
+		  active_fd, errno))
+		goto out;
+
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.del_sk_stg_map),
+				  &active_fd, &value, 0);
+	if (!ASSERT_OK(err, "map_update(del_sk_stg_map)"))
+		goto out;
+
+	passive_fd = accept(listen_fd, NULL, 0);
+	if (CHECK(passive_fd == -1, "accept", "passive_fd:%d errno:%d\n",
+		  passive_fd, errno))
+		goto out;
+
+	shutdown(active_fd, SHUT_WR);
+	err = read(passive_fd, &abyte, 1);
+	if (!ASSERT_OK(err, "read(passive_fd)"))
+		goto out;
+
+	shutdown(passive_fd, SHUT_WR);
+	err = read(active_fd, &abyte, 1);
+	if (!ASSERT_OK(err, "read(active_fd)"))
+		goto out;
+
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.del_sk_stg_map),
+				  &active_fd, &value);
+	if (!ASSERT_ERR(err, "map_lookup(del_sk_stg_map)"))
+		goto out;
+
+	err = check_sk_stg(listen_fd, BPF_TCP_LISTEN);
+	if (!ASSERT_OK(err, "listen_fd sk_stg"))
+		goto out;
+
+	err = check_sk_stg(active_fd, BPF_TCP_FIN_WAIT2);
+	if (!ASSERT_OK(err, "active_fd sk_stg"))
+		goto out;
+
+	err = check_sk_stg(passive_fd, BPF_TCP_LAST_ACK);
+	ASSERT_OK(err, "passive_fd sk_stg");
+
+out:
+	if (active_fd != -1)
+		close(active_fd);
+	if (passive_fd != -1)
+		close(passive_fd);
+	if (listen_fd != -1)
+		close(listen_fd);
+}
+
+void test_sk_storage_tracing(void)
+{
+	struct test_sk_storage_trace_itself *skel_itself;
+	int err;
+
+	my_pid = getpid();
+
+	skel_itself = test_sk_storage_trace_itself__open_and_load();
+
+	if (!ASSERT_NULL(skel_itself, "test_sk_storage_trace_itself")) {
+		test_sk_storage_trace_itself__destroy(skel_itself);
+		return;
+	}
+
+	skel = test_sk_storage_tracing__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_sk_storage_tracing"))
+		return;
+
+	err = test_sk_storage_tracing__attach(skel);
+	if (!ASSERT_OK(err, "test_sk_storage_tracing__attach")) {
+		test_sk_storage_tracing__destroy(skel);
+		return;
+	}
+
+	do_test();
+
+	test_sk_storage_tracing__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
new file mode 100644
index 000000000000..59ef72d02a61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("fentry/bpf_sk_storage_free")
+int BPF_PROG(trace_bpf_sk_storage_free, struct sock *sk)
+{
+	int *value;
+
+	value = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				   BPF_SK_STORAGE_GET_F_CREATE);
+
+	if (value)
+		*value = 1;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
new file mode 100644
index 000000000000..8e94e5c080aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+struct sk_stg {
+	__u32 pid;
+	__u32 last_notclose_state;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+/* Testing delete */
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} del_sk_stg_map SEC(".maps");
+
+char task_comm[16] = "";
+
+SEC("tp_btf/inet_sock_set_state")
+int BPF_PROG(trace_inet_sock_set_state, struct sock *sk, int oldstate,
+	     int newstate)
+{
+	struct sk_stg *stg;
+
+	if (newstate == BPF_TCP_CLOSE)
+		return 0;
+
+	stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				 BPF_SK_STORAGE_GET_F_CREATE);
+	if (!stg)
+		return 0;
+
+	stg->last_notclose_state = newstate;
+
+	bpf_sk_storage_delete(&del_sk_stg_map, sk);
+
+	return 0;
+}
+
+static void set_task_info(struct sock *sk)
+{
+	struct task_struct *task;
+	struct sk_stg *stg;
+
+	stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				 BPF_SK_STORAGE_GET_F_CREATE);
+	if (!stg)
+		return;
+
+	stg->pid = bpf_get_current_pid_tgid();
+
+	task = (struct task_struct *)bpf_get_current_task();
+	bpf_core_read_str(&stg->comm, sizeof(stg->comm), &task->comm);
+	bpf_core_read_str(&task_comm, sizeof(task_comm), &task->comm);
+}
+
+SEC("fentry/inet_csk_listen_start")
+int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog)
+{
+	set_task_info(sk);
+
+	return 0;
+}
+
+SEC("fentry/tcp_connect")
+int BPF_PROG(trace_tcp_connect, struct sock *sk)
+{
+	set_task_info(sk);
+
+	return 0;
+}
+
+SEC("fexit/inet_csk_accept")
+int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+	     struct sock *accepted_sk)
+{
+	set_task_info(accepted_sk);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 024cd2cbd1ca2d29e6df538855d52c4e5990cab7 Mon Sep 17 00:00:00 2001
From: Santucci Pierpaolo <santucci@epigenesys.com>
Date: Mon, 16 Nov 2020 11:30:37 +0100
Subject: selftest/bpf: Fix IPV6FR handling in flow dissector

From second fragment on, IPV6FR program must stop the dissection of IPV6
fragmented packet. This is the same approach used for IPV4 fragmentation.
This fixes the flow keys calculation for the upper-layer protocols.
Note that according to RFC8200, the first fragment packet must include
the upper-layer header.

Signed-off-by: Santucci Pierpaolo <santucci@epigenesys.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/X7JUzUj34ceE2wBm@santucci.pierpaolo
---
 tools/testing/selftests/bpf/progs/bpf_flow.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 5a65f6b51377..95a5a0778ed7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -368,6 +368,8 @@ PROG(IPV6FR)(struct __sk_buff *skb)
 		 */
 		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
 			return export_flow_keys(keys, BPF_OK);
+	} else {
+		return export_flow_keys(keys, BPF_OK);
 	}
 
 	return parse_ipv6_proto(skb, fragh->nexthdr);
-- 
cgit v1.2.3


From 8113ab20e850491b4144a1a64246f07a2d737a49 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 15 Oct 2020 12:28:58 +0200
Subject: tools/power/cpupower: Read energy_perf_bias from sysfs

... instead of poking at the MSR. For that, move the accessor functions
to misc.c and add a sysfs-writing function too.

There should be no functional changes resulting from this.

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Thomas Renninger <trenn@suse.com>
Link: https://lkml.kernel.org/r/20201029190259.3476-2-bp@alien8.de
---
 tools/power/cpupower/lib/cpupower.c          | 23 ++++++++++++-
 tools/power/cpupower/lib/cpupower_intern.h   |  5 +++
 tools/power/cpupower/utils/cpupower-info.c   |  2 +-
 tools/power/cpupower/utils/cpupower-set.c    |  2 +-
 tools/power/cpupower/utils/helpers/helpers.h |  8 ++---
 tools/power/cpupower/utils/helpers/misc.c    | 48 ++++++++++++++++++++++++++++
 tools/power/cpupower/utils/helpers/msr.c     | 28 ----------------
 7 files changed, 81 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 3656e697537e..3f7d0c0c5067 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -16,8 +16,8 @@
 
 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 {
-	int fd;
 	ssize_t numread;
+	int fd;
 
 	fd = open(path, O_RDONLY);
 	if (fd == -1)
@@ -35,6 +35,27 @@ unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 	return (unsigned int) numread;
 }
 
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen)
+{
+	ssize_t numwritten;
+	int fd;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwritten = write(fd, buf, buflen - 1);
+	if (numwritten < 1) {
+		perror(path);
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwritten;
+}
+
 /*
  * Detect whether a CPU is online
  *
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
index 4887c76d23f8..ac1112b956ec 100644
--- a/tools/power/cpupower/lib/cpupower_intern.h
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -1,6 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #define PATH_TO_CPU "/sys/devices/system/cpu/"
+
+#ifndef MAX_LINE_LEN
 #define MAX_LINE_LEN 4096
+#endif
+
 #define SYSFS_PATH_MAX 255
 
 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index 0ba61a2c4d81..06345b543786 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -101,7 +101,7 @@ int cmd_info(int argc, char **argv)
 		}
 
 		if (params.perf_bias) {
-			ret = msr_intel_get_perf_bias(cpu);
+			ret = cpupower_intel_get_perf_bias(cpu);
 			if (ret < 0) {
 				fprintf(stderr,
 			_("Could not read perf-bias value[%d]\n"), ret);
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 052044d7e012..180d5ba877e6 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -95,7 +95,7 @@ int cmd_set(int argc, char **argv)
 		}
 
 		if (params.perf_bias) {
-			ret = msr_intel_set_perf_bias(cpu, perf_bias);
+			ret = cpupower_intel_set_perf_bias(cpu, perf_bias);
 			if (ret) {
 				fprintf(stderr, _("Error setting perf-bias "
 						  "value on CPU %d\n"), cpu);
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index c258eeccd05f..37dac161f3fe 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -105,8 +105,8 @@ extern struct cpupower_cpu_info cpupower_cpu_info;
 extern int read_msr(int cpu, unsigned int idx, unsigned long long *val);
 extern int write_msr(int cpu, unsigned int idx, unsigned long long val);
 
-extern int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val);
-extern int msr_intel_get_perf_bias(unsigned int cpu);
+extern int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val);
+extern int cpupower_intel_get_perf_bias(unsigned int cpu);
 extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
 
 /* Read/Write msr ****************************/
@@ -150,9 +150,9 @@ static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val)
 { return -1; };
 static inline int write_msr(int cpu, unsigned int idx, unsigned long long val)
 { return -1; };
-static inline int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
+static inline int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
 { return -1; };
-static inline int msr_intel_get_perf_bias(unsigned int cpu)
+static inline int cpupower_intel_get_perf_bias(unsigned int cpu)
 { return -1; };
 static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
 { return 0; };
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index f406adc40bad..e8f8f643a627 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -1,7 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
 #if defined(__i386__) || defined(__x86_64__)
 
 #include "helpers/helpers.h"
+#include "helpers/sysfs.h"
+
+#include "cpupower_intern.h"
 
 #define MSR_AMD_HWCR	0xc0010015
 
@@ -40,4 +48,44 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 		*support = *active = 1;
 	return 0;
 }
+
+int cpupower_intel_get_perf_bias(unsigned int cpu)
+{
+	char linebuf[MAX_LINE_LEN];
+	char path[SYSFS_PATH_MAX];
+	unsigned long val;
+	char *endp;
+
+	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+
+	if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0)
+		return -1;
+
+	val = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+
+	return val;
+}
+
+int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[3] = {};
+
+	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+	snprintf(linebuf, sizeof(linebuf), "%d", val);
+
+	if (cpupower_write_sysfs(path, linebuf, 3) <= 0)
+		return -1;
+
+	return 0;
+}
+
 #endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/msr.c b/tools/power/cpupower/utils/helpers/msr.c
index ab9950748838..8b0b6be74bb8 100644
--- a/tools/power/cpupower/utils/helpers/msr.c
+++ b/tools/power/cpupower/utils/helpers/msr.c
@@ -11,7 +11,6 @@
 /* Intel specific MSRs */
 #define MSR_IA32_PERF_STATUS		0x198
 #define MSR_IA32_MISC_ENABLES		0x1a0
-#define MSR_IA32_ENERGY_PERF_BIAS	0x1b0
 #define MSR_NEHALEM_TURBO_RATIO_LIMIT	0x1ad
 
 /*
@@ -73,33 +72,6 @@ int write_msr(int cpu, unsigned int idx, unsigned long long val)
 	return -1;
 }
 
-int msr_intel_get_perf_bias(unsigned int cpu)
-{
-	unsigned long long val;
-	int ret;
-
-	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
-		return -1;
-
-	ret = read_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &val);
-	if (ret)
-		return ret;
-	return val;
-}
-
-int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
-{
-	int ret;
-
-	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
-		return -1;
-
-	ret = write_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, val);
-	if (ret)
-		return ret;
-	return 0;
-}
-
 unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
 {
 	unsigned long long val;
-- 
cgit v1.2.3


From 6d6501d912a9a5e1b73d7fbf419b90a8ec11ed7a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 15 Oct 2020 14:50:16 +0200
Subject: tools/power/turbostat: Read energy_perf_bias from sysfs

... instead of poking at the MSR directly.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Len Brown <lenb@kernel.org>
Cc: linux-pm@vger.kernel.org
Link: https://lkml.kernel.org/r/20201029190259.3476-3-bp@alien8.de
---
 tools/power/x86/turbostat/turbostat.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 33b370865d16..0baec7ea1bbd 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1721,6 +1721,25 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 	return 0;
 }
 
+int get_epb(int cpu)
+{
+	char path[128 + PATH_BYTES];
+	int ret, epb = -1;
+	FILE *fp;
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
+
+	fp = fopen_or_die(path, "r");
+
+	ret = fscanf(fp, "%d", &epb);
+	if (ret != 1)
+		err(1, "%s(%s)", __func__, path);
+
+	fclose(fp);
+
+	return epb;
+}
+
 void get_apic_id(struct thread_data *t)
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -3631,9 +3650,8 @@ dump_sysfs_pstate_config(void)
  */
 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
-	unsigned long long msr;
 	char *epb_string;
-	int cpu;
+	int cpu, epb;
 
 	if (!has_epb)
 		return 0;
@@ -3649,10 +3667,11 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 	}
 
-	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
+	epb = get_epb(cpu);
+	if (epb < 0)
 		return 0;
 
-	switch (msr & 0xF) {
+	switch (epb) {
 	case ENERGY_PERF_BIAS_PERFORMANCE:
 		epb_string = "performance";
 		break;
@@ -3666,7 +3685,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		epb_string = "custom";
 		break;
 	}
-	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+	fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
 
 	return 0;
 }
-- 
cgit v1.2.3


From fe0a5788624c8b8f113a35bbe4636e37f9321241 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 15 Oct 2020 14:58:48 +0200
Subject: tools/power/x86_energy_perf_policy: Read energy_perf_bias from sysfs

... and stop poking at the MSR directly.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201029190259.3476-4-bp@alien8.de
---
 .../x86_energy_perf_policy.c                       | 109 +++++++++++++++++++--
 1 file changed, 99 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 3fe1eed900d4..ad6aed1fabf8 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -91,6 +91,9 @@ unsigned int has_hwp_request_pkg;	/* IA32_HWP_REQUEST_PKG */
 
 unsigned int bdx_highest_ratio;
 
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define SYSFS_PATH_MAX 255
+
 /*
  * maintain compatibility with original implementation, but don't document it:
  */
@@ -668,6 +671,48 @@ int put_msr(int cpu, int offset, unsigned long long new_msr)
 	return 0;
 }
 
+static unsigned int read_sysfs(const char *path, char *buf, size_t buflen)
+{
+	ssize_t numread;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+static unsigned int write_sysfs(const char *path, char *buf, size_t buflen)
+{
+	ssize_t numwritten;
+	int fd;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwritten = write(fd, buf, buflen - 1);
+	if (numwritten < 1) {
+		perror("write failed\n");
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwritten;
+}
+
 void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
 {
 	if (cpu != -1)
@@ -745,17 +790,61 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms
 	put_msr(cpu, msr_offset, msr);
 }
 
+static int get_epb(int cpu)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[3];
+	char *endp;
+	long val;
+
+	if (!has_epb)
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+
+	if (!read_sysfs(path, linebuf, 3))
+		return -1;
+
+	val = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+
+	return (int)val;
+}
+
+static int set_epb(int cpu, int val)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[3];
+	char *endp;
+	int ret;
+
+	if (!has_epb)
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+	snprintf(linebuf, sizeof(linebuf), "%d", val);
+
+	ret = write_sysfs(path, linebuf, 3);
+	if (ret <= 0)
+		return -1;
+
+	val = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+
+	return (int)val;
+}
+
 int print_cpu_msrs(int cpu)
 {
-	unsigned long long msr;
 	struct msr_hwp_request req;
 	struct msr_hwp_cap cap;
+	int epb;
 
-	if (has_epb) {
-		get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
-
-		printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
-	}
+	epb = get_epb(cpu);
+	if (epb >= 0)
+		printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb);
 
 	if (!has_hwp)
 		return 0;
@@ -1038,15 +1127,15 @@ int enable_hwp_on_cpu(int cpu)
 int update_cpu_msrs(int cpu)
 {
 	unsigned long long msr;
-
+	int epb;
 
 	if (update_epb) {
-		get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
-		put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
+		epb = get_epb(cpu);
+		set_epb(cpu, new_epb);
 
 		if (verbose)
 			printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
-				cpu, (unsigned int) msr, (unsigned int) new_epb);
+				cpu, epb, (unsigned int) new_epb);
 	}
 
 	if (update_turbo) {
-- 
cgit v1.2.3


From de91e631bdc7e6411989e1a9ab65501a31527e0b Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Sun, 15 Nov 2020 10:46:35 +0000
Subject: libbpf: bpf__find_by_name[_kind] should use btf__get_nr_types()

When operating on split BTF, btf__find_by_name[_kind] will not
iterate over all types since they use btf->nr_types to show
the number of types to iterate over. For split BTF this is
the number of types _on top of base BTF_, so it will
underestimate the number of types to iterate over, especially
for vmlinux + module BTF, where the latter is much smaller.

Use btf__get_nr_types() instead.

Fixes: ba451366bf44 ("libbpf: Implement basic split BTF support")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1605437195-2175-1-git-send-email-alan.maguire@oracle.com
---
 tools/lib/bpf/btf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2d0d064c6d31..8ff46cd30ca1 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -674,12 +674,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 
 __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 {
-	__u32 i;
+	__u32 i, nr_types = btf__get_nr_types(btf);
 
 	if (!strcmp(type_name, "void"))
 		return 0;
 
-	for (i = 1; i <= btf->nr_types; i++) {
+	for (i = 1; i <= nr_types; i++) {
 		const struct btf_type *t = btf__type_by_id(btf, i);
 		const char *name = btf__name_by_offset(btf, t->name_off);
 
@@ -693,12 +693,12 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
 			     __u32 kind)
 {
-	__u32 i;
+	__u32 i, nr_types = btf__get_nr_types(btf);
 
 	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
 		return 0;
 
-	for (i = 1; i <= btf->nr_types; i++) {
+	for (i = 1; i <= nr_types; i++) {
 		const struct btf_type *t = btf__type_by_id(btf, i);
 		const char *name;
 
-- 
cgit v1.2.3


From 3f6719c7b62f0327c9091e26d0da10e65668229e Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 17 Nov 2020 23:29:28 +0000
Subject: bpf: Add bpf_bprm_opts_set helper

The helper allows modification of certain bits on the linux_binprm
struct starting with the secureexec bit which can be updated using the
BPF_F_BPRM_SECUREEXEC flag.

secureexec can be set by the LSM for privilege gaining executions to set
the AT_SECURE auxv for glibc.  When set, the dynamic linker disables the
use of certain environment variables (like LD_PRELOAD).

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117232929.2156341-1-kpsingh@chromium.org
---
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 162999b12790..a52299b80b9d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3787,6 +3787,16 @@ union bpf_attr {
  *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
  *	Return
  *		Pointer to the current task.
+ *
+ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags)
+ *	Description
+ *		Set or clear certain options on *bprm*:
+ *
+ *		**BPF_F_BPRM_SECUREEXEC** Set the secureexec bit
+ *		which sets the **AT_SECURE** auxv for glibc. The bit
+ *		is cleared if the flag is not specified.
+ *	Return
+ *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3948,6 +3958,7 @@ union bpf_attr {
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
+	FN(bprm_opts_set),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_IP,
 };
 
+/* Flags for bpf_bprm_opts_set helper */
+enum {
+	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\
-- 
cgit v1.2.3


From ea87ae85c9b31303a2e9d4c769d9f3ee8a3a60d1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 17 Nov 2020 23:29:29 +0000
Subject: bpf: Add tests for bpf_bprm_opts_set helper

The test forks a child process, updates the local storage to set/unset
the securexec bit.

The BPF program in the test attaches to bprm_creds_for_exec which checks
the local storage of the current task to set the secureexec bit on the
binary parameters (bprm).

The child then execs a bash command with the environment variable
TMPDIR set in the envp.  The bash command returns a different exit code
based on its observed value of the TMPDIR variable.

Since TMPDIR is one of the variables that is ignored by the dynamic
loader when the secureexec bit is set, one should expect the
child execution to not see this value when the secureexec bit is set.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117232929.2156341-2-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_bprm_opts.c      | 116 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bprm_opts.c      |  34 ++++++
 2 files changed, 150 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
 create mode 100644 tools/testing/selftests/bpf/progs/bprm_opts.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
new file mode 100644
index 000000000000..2559bb775762
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "bprm_opts.skel.h"
+#include "network_helpers.h"
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+
+static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
+
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int update_storage(int map_fd, int secureexec)
+{
+	int task_fd, ret = 0;
+
+	task_fd = sys_pidfd_open(getpid(), 0);
+	if (task_fd < 0)
+		return errno;
+
+	ret = bpf_map_update_elem(map_fd, &task_fd, &secureexec, BPF_NOEXIST);
+	if (ret)
+		ret = errno;
+
+	close(task_fd);
+	return ret;
+}
+
+static int run_set_secureexec(int map_fd, int secureexec)
+{
+	int child_pid, child_status, ret, null_fd;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		null_fd = open("/dev/null", O_WRONLY);
+		if (null_fd == -1)
+			exit(errno);
+		dup2(null_fd, STDOUT_FILENO);
+		dup2(null_fd, STDERR_FILENO);
+		close(null_fd);
+
+		/* Ensure that all executions from hereon are
+		 * secure by setting a local storage which is read by
+		 * the bprm_creds_for_exec hook and sets bprm->secureexec.
+		 */
+		ret = update_storage(map_fd, secureexec);
+		if (ret)
+			exit(ret);
+
+		/* If the binary is executed with securexec=1, the dynamic
+		 * loader ingores and unsets certain variables like LD_PRELOAD,
+		 * TMPDIR etc. TMPDIR is used here to simplify the example, as
+		 * LD_PRELOAD requires a real .so file.
+		 *
+		 * If the value of TMPDIR is set, the bash command returns 10
+		 * and if the value is unset, it returns 20.
+		 */
+		execle("/bin/bash", "bash", "-c",
+		       "[[ -z \"${TMPDIR}\" ]] || exit 10 && exit 20", NULL,
+		       bash_envp);
+		exit(errno);
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		ret = WEXITSTATUS(child_status);
+
+		/* If a secureexec occurred, the exit status should be 20 */
+		if (secureexec && ret == 20)
+			return 0;
+
+		/* If normal execution happened, the exit code should be 10 */
+		if (!secureexec && ret == 10)
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+void test_test_bprm_opts(void)
+{
+	int err, duration = 0;
+	struct bprm_opts *skel = NULL;
+
+	skel = bprm_opts__open_and_load();
+	if (CHECK(!skel, "skel_load", "skeleton failed\n"))
+		goto close_prog;
+
+	err = bprm_opts__attach(skel);
+	if (CHECK(err, "attach", "attach failed: %d\n", err))
+		goto close_prog;
+
+	/* Run the test with the secureexec bit unset */
+	err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map),
+				 0 /* secureexec */);
+	if (CHECK(err, "run_set_secureexec:0", "err = %d\n", err))
+		goto close_prog;
+
+	/* Run the test with the secureexec bit set */
+	err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map),
+				 1 /* secureexec */);
+	if (CHECK(err, "run_set_secureexec:1", "err = %d\n", err))
+		goto close_prog;
+
+close_prog:
+	bprm_opts__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c
new file mode 100644
index 000000000000..5bfef2887e70
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bprm_opts.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} secure_exec_task_map SEC(".maps");
+
+SEC("lsm/bprm_creds_for_exec")
+int BPF_PROG(secure_exec, struct linux_binprm *bprm)
+{
+	int *secureexec;
+
+	secureexec = bpf_task_storage_get(&secure_exec_task_map,
+				   bpf_get_current_task_btf(), 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+	if (secureexec && *secureexec)
+		bpf_bprm_opts_set(bprm, BPF_F_BPRM_SECUREEXEC);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 2adcba79e69d4a4c0ac3bb86f466d8b5df301608 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Fri, 13 Nov 2020 00:01:31 +0200
Subject: selftests/x86: Add a selftest for SGX

Add a selftest for SGX. It is a trivial test where a simple enclave
copies one 64-bit word of memory between two memory locations,
but ensures that all SGX hardware and software infrastructure is
functioning.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Cc: linux-kselftest@vger.kernel.org
Link: https://lkml.kernel.org/r/20201112220135.165028-21-jarkko@kernel.org
---
 tools/testing/selftests/Makefile                  |   1 +
 tools/testing/selftests/sgx/.gitignore            |   2 +
 tools/testing/selftests/sgx/Makefile              |  53 +++
 tools/testing/selftests/sgx/call.S                |  44 +++
 tools/testing/selftests/sgx/defines.h             |  21 ++
 tools/testing/selftests/sgx/load.c                | 277 +++++++++++++++
 tools/testing/selftests/sgx/main.c                | 246 ++++++++++++++
 tools/testing/selftests/sgx/main.h                |  38 +++
 tools/testing/selftests/sgx/sigstruct.c           | 391 ++++++++++++++++++++++
 tools/testing/selftests/sgx/test_encl.c           |  20 ++
 tools/testing/selftests/sgx/test_encl.lds         |  40 +++
 tools/testing/selftests/sgx/test_encl_bootstrap.S |  89 +++++
 12 files changed, 1222 insertions(+)
 create mode 100644 tools/testing/selftests/sgx/.gitignore
 create mode 100644 tools/testing/selftests/sgx/Makefile
 create mode 100644 tools/testing/selftests/sgx/call.S
 create mode 100644 tools/testing/selftests/sgx/defines.h
 create mode 100644 tools/testing/selftests/sgx/load.c
 create mode 100644 tools/testing/selftests/sgx/main.c
 create mode 100644 tools/testing/selftests/sgx/main.h
 create mode 100644 tools/testing/selftests/sgx/sigstruct.c
 create mode 100644 tools/testing/selftests/sgx/test_encl.c
 create mode 100644 tools/testing/selftests/sgx/test_encl.lds
 create mode 100644 tools/testing/selftests/sgx/test_encl_bootstrap.S

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..2e20e30a6faa 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -50,6 +50,7 @@ TARGETS += openat2
 TARGETS += rseq
 TARGETS += rtc
 TARGETS += seccomp
+TARGETS += sgx
 TARGETS += sigaltstack
 TARGETS += size
 TARGETS += sparc64
diff --git a/tools/testing/selftests/sgx/.gitignore b/tools/testing/selftests/sgx/.gitignore
new file mode 100644
index 000000000000..fbaf0bda9a92
--- /dev/null
+++ b/tools/testing/selftests/sgx/.gitignore
@@ -0,0 +1,2 @@
+test_sgx
+test_encl.elf
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
new file mode 100644
index 000000000000..d51c90663943
--- /dev/null
+++ b/tools/testing/selftests/sgx/Makefile
@@ -0,0 +1,53 @@
+top_srcdir = ../../../..
+
+include ../lib.mk
+
+.PHONY: all clean
+
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+			    ../x86/trivial_64bit_program.c)
+
+ifndef OBJCOPY
+OBJCOPY := $(CROSS_COMPILE)objcopy
+endif
+
+INCLUDES := -I$(top_srcdir)/tools/include
+HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack
+ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \
+	       -fno-stack-protector -mrdrnd $(INCLUDES)
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx
+
+ifeq ($(CAN_BUILD_X86_64), 1)
+all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf
+endif
+
+$(OUTPUT)/test_sgx: $(OUTPUT)/main.o \
+		    $(OUTPUT)/load.o \
+		    $(OUTPUT)/sigstruct.o \
+		    $(OUTPUT)/call.o
+	$(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto
+
+$(OUTPUT)/main.o: main.c
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/load.o: load.c
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/sigstruct.o: sigstruct.c
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/call.o: call.S
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S
+	$(CC) $(ENCL_CFLAGS) -T $^ -o $@
+
+EXTRA_CLEAN := \
+	$(OUTPUT)/test_encl.elf \
+	$(OUTPUT)/load.o \
+	$(OUTPUT)/call.o \
+	$(OUTPUT)/main.o \
+	$(OUTPUT)/sigstruct.o \
+	$(OUTPUT)/test_sgx \
+	$(OUTPUT)/test_sgx.o \
diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S
new file mode 100644
index 000000000000..4ecadc7490f4
--- /dev/null
+++ b/tools/testing/selftests/sgx/call.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+* Copyright(c) 2016-20 Intel Corporation.
+*/
+
+	.text
+
+	.global sgx_call_vdso
+sgx_call_vdso:
+	.cfi_startproc
+	push	%r15
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r15, 0
+	push	%r14
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r14, 0
+	push	%r13
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r13, 0
+	push	%r12
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r12, 0
+	push	%rbx
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%rbx, 0
+	push	$0
+	.cfi_adjust_cfa_offset	8
+	push	0x38(%rsp)
+	.cfi_adjust_cfa_offset	8
+	call	*eenter(%rip)
+	add	$0x10, %rsp
+	.cfi_adjust_cfa_offset	-0x10
+	pop	%rbx
+	.cfi_adjust_cfa_offset	-8
+	pop	%r12
+	.cfi_adjust_cfa_offset	-8
+	pop	%r13
+	.cfi_adjust_cfa_offset	-8
+	pop	%r14
+	.cfi_adjust_cfa_offset	-8
+	pop	%r15
+	.cfi_adjust_cfa_offset	-8
+	ret
+	.cfi_endproc
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
new file mode 100644
index 000000000000..592c1ccf4576
--- /dev/null
+++ b/tools/testing/selftests/sgx/defines.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+#ifndef DEFINES_H
+#define DEFINES_H
+
+#include <stdint.h>
+
+#define PAGE_SIZE 4096
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#define __aligned(x) __attribute__((__aligned__(x)))
+#define __packed __attribute__((packed))
+
+#include "../../../../arch/x86/kernel/cpu/sgx/arch.h"
+#include "../../../../arch/x86/include/asm/enclu.h"
+#include "../../../../arch/x86/include/uapi/asm/sgx.h"
+
+#endif /* DEFINES_H */
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
new file mode 100644
index 000000000000..9d43b75aaa55
--- /dev/null
+++ b/tools/testing/selftests/sgx/load.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <assert.h>
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "defines.h"
+#include "main.h"
+
+void encl_delete(struct encl *encl)
+{
+	if (encl->encl_base)
+		munmap((void *)encl->encl_base, encl->encl_size);
+
+	if (encl->bin)
+		munmap(encl->bin, encl->bin_size);
+
+	if (encl->fd)
+		close(encl->fd);
+
+	if (encl->segment_tbl)
+		free(encl->segment_tbl);
+
+	memset(encl, 0, sizeof(*encl));
+}
+
+static bool encl_map_bin(const char *path, struct encl *encl)
+{
+	struct stat sb;
+	void *bin;
+	int ret;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)  {
+		perror("open()");
+		return false;
+	}
+
+	ret = stat(path, &sb);
+	if (ret) {
+		perror("stat()");
+		goto err;
+	}
+
+	bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (bin == MAP_FAILED) {
+		perror("mmap()");
+		goto err;
+	}
+
+	encl->bin = bin;
+	encl->bin_size = sb.st_size;
+
+	close(fd);
+	return true;
+
+err:
+	close(fd);
+	return false;
+}
+
+static bool encl_ioc_create(struct encl *encl)
+{
+	struct sgx_secs *secs = &encl->secs;
+	struct sgx_enclave_create ioc;
+	int rc;
+
+	assert(encl->encl_base != 0);
+
+	memset(secs, 0, sizeof(*secs));
+	secs->ssa_frame_size = 1;
+	secs->attributes = SGX_ATTR_MODE64BIT;
+	secs->xfrm = 3;
+	secs->base = encl->encl_base;
+	secs->size = encl->encl_size;
+
+	ioc.src = (unsigned long)secs;
+	rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc);
+	if (rc) {
+		fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n",
+			errno);
+		munmap((void *)secs->base, encl->encl_size);
+		return false;
+	}
+
+	return true;
+}
+
+static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
+{
+	struct sgx_enclave_add_pages ioc;
+	struct sgx_secinfo secinfo;
+	int rc;
+
+	memset(&secinfo, 0, sizeof(secinfo));
+	secinfo.flags = seg->flags;
+
+	ioc.src = (uint64_t)encl->src + seg->offset;
+	ioc.offset = seg->offset;
+	ioc.length = seg->size;
+	ioc.secinfo = (unsigned long)&secinfo;
+	ioc.flags = SGX_PAGE_MEASURE;
+
+	rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
+	if (rc < 0) {
+		fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n",
+			errno);
+		return false;
+	}
+
+	return true;
+}
+
+bool encl_load(const char *path, struct encl *encl)
+{
+	Elf64_Phdr *phdr_tbl;
+	off_t src_offset;
+	Elf64_Ehdr *ehdr;
+	int i, j;
+	int ret;
+
+	memset(encl, 0, sizeof(*encl));
+
+	ret = open("/dev/sgx_enclave", O_RDWR);
+	if (ret < 0) {
+		fprintf(stderr, "Unable to open /dev/sgx_enclave\n");
+		goto err;
+	}
+
+	encl->fd = ret;
+
+	if (!encl_map_bin(path, encl))
+		goto err;
+
+	ehdr = encl->bin;
+	phdr_tbl = encl->bin + ehdr->e_phoff;
+
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		Elf64_Phdr *phdr = &phdr_tbl[i];
+
+		if (phdr->p_type == PT_LOAD)
+			encl->nr_segments++;
+	}
+
+	encl->segment_tbl = calloc(encl->nr_segments,
+				   sizeof(struct encl_segment));
+	if (!encl->segment_tbl)
+		goto err;
+
+	for (i = 0, j = 0; i < ehdr->e_phnum; i++) {
+		Elf64_Phdr *phdr = &phdr_tbl[i];
+		unsigned int flags = phdr->p_flags;
+		struct encl_segment *seg;
+
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		seg = &encl->segment_tbl[j];
+
+		if (!!(flags & ~(PF_R | PF_W | PF_X))) {
+			fprintf(stderr,
+				"%d has invalid segment flags 0x%02x.\n", i,
+				phdr->p_flags);
+			goto err;
+		}
+
+		if (j == 0 && flags != (PF_R | PF_W)) {
+			fprintf(stderr,
+				"TCS has invalid segment flags 0x%02x.\n",
+				phdr->p_flags);
+			goto err;
+		}
+
+		if (j == 0) {
+			src_offset = phdr->p_offset & PAGE_MASK;
+
+			seg->prot = PROT_READ | PROT_WRITE;
+			seg->flags = SGX_PAGE_TYPE_TCS << 8;
+		} else  {
+			seg->prot = (phdr->p_flags & PF_R) ? PROT_READ : 0;
+			seg->prot |= (phdr->p_flags & PF_W) ? PROT_WRITE : 0;
+			seg->prot |= (phdr->p_flags & PF_X) ? PROT_EXEC : 0;
+			seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot;
+		}
+
+		seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset;
+		seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK;
+
+		printf("0x%016lx 0x%016lx 0x%02x\n", seg->offset, seg->size,
+		       seg->prot);
+
+		j++;
+	}
+
+	assert(j == encl->nr_segments);
+
+	encl->src = encl->bin + src_offset;
+	encl->src_size = encl->segment_tbl[j - 1].offset +
+			 encl->segment_tbl[j - 1].size;
+
+	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
+		encl->encl_size <<= 1;
+
+	return true;
+
+err:
+	encl_delete(encl);
+	return false;
+}
+
+static bool encl_map_area(struct encl *encl)
+{
+	size_t encl_size = encl->encl_size;
+	void *area;
+
+	area = mmap(NULL, encl_size * 2, PROT_NONE,
+		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (area == MAP_FAILED) {
+		perror("mmap");
+		return false;
+	}
+
+	encl->encl_base = ((uint64_t)area + encl_size - 1) & ~(encl_size - 1);
+
+	munmap(area, encl->encl_base - (uint64_t)area);
+	munmap((void *)(encl->encl_base + encl_size),
+	       (uint64_t)area + encl_size - encl->encl_base);
+
+	return true;
+}
+
+bool encl_build(struct encl *encl)
+{
+	struct sgx_enclave_init ioc;
+	int ret;
+	int i;
+
+	if (!encl_map_area(encl))
+		return false;
+
+	if (!encl_ioc_create(encl))
+		return false;
+
+	/*
+	 * Pages must be added before mapping VMAs because their permissions
+	 * cap the VMA permissions.
+	 */
+	for (i = 0; i < encl->nr_segments; i++) {
+		struct encl_segment *seg = &encl->segment_tbl[i];
+
+		if (!encl_ioc_add_pages(encl, seg))
+			return false;
+	}
+
+	ioc.sigstruct = (uint64_t)&encl->sigstruct;
+	ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc);
+	if (ret) {
+		fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n",
+			errno);
+		return false;
+	}
+
+	return true;
+}
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
new file mode 100644
index 000000000000..724cec700926
--- /dev/null
+++ b/tools/testing/selftests/sgx/main.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "defines.h"
+#include "main.h"
+#include "../kselftest.h"
+
+static const uint64_t MAGIC = 0x1122334455667788ULL;
+vdso_sgx_enter_enclave_t eenter;
+
+struct vdso_symtab {
+	Elf64_Sym *elf_symtab;
+	const char *elf_symstrtab;
+	Elf64_Word *elf_hashtab;
+};
+
+static void *vdso_get_base_addr(char *envp[])
+{
+	Elf64_auxv_t *auxv;
+	int i;
+
+	for (i = 0; envp[i]; i++)
+		;
+
+	auxv = (Elf64_auxv_t *)&envp[i + 1];
+
+	for (i = 0; auxv[i].a_type != AT_NULL; i++) {
+		if (auxv[i].a_type == AT_SYSINFO_EHDR)
+			return (void *)auxv[i].a_un.a_val;
+	}
+
+	return NULL;
+}
+
+static Elf64_Dyn *vdso_get_dyntab(void *addr)
+{
+	Elf64_Ehdr *ehdr = addr;
+	Elf64_Phdr *phdrtab = addr + ehdr->e_phoff;
+	int i;
+
+	for (i = 0; i < ehdr->e_phnum; i++)
+		if (phdrtab[i].p_type == PT_DYNAMIC)
+			return addr + phdrtab[i].p_offset;
+
+	return NULL;
+}
+
+static void *vdso_get_dyn(void *addr, Elf64_Dyn *dyntab, Elf64_Sxword tag)
+{
+	int i;
+
+	for (i = 0; dyntab[i].d_tag != DT_NULL; i++)
+		if (dyntab[i].d_tag == tag)
+			return addr + dyntab[i].d_un.d_ptr;
+
+	return NULL;
+}
+
+static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab)
+{
+	Elf64_Dyn *dyntab = vdso_get_dyntab(addr);
+
+	symtab->elf_symtab = vdso_get_dyn(addr, dyntab, DT_SYMTAB);
+	if (!symtab->elf_symtab)
+		return false;
+
+	symtab->elf_symstrtab = vdso_get_dyn(addr, dyntab, DT_STRTAB);
+	if (!symtab->elf_symstrtab)
+		return false;
+
+	symtab->elf_hashtab = vdso_get_dyn(addr, dyntab, DT_HASH);
+	if (!symtab->elf_hashtab)
+		return false;
+
+	return true;
+}
+
+static unsigned long elf_sym_hash(const char *name)
+{
+	unsigned long h = 0, high;
+
+	while (*name) {
+		h = (h << 4) + *name++;
+		high = h & 0xf0000000;
+
+		if (high)
+			h ^= high >> 24;
+
+		h &= ~high;
+	}
+
+	return h;
+}
+
+static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name)
+{
+	Elf64_Word bucketnum = symtab->elf_hashtab[0];
+	Elf64_Word *buckettab = &symtab->elf_hashtab[2];
+	Elf64_Word *chaintab = &symtab->elf_hashtab[2 + bucketnum];
+	Elf64_Sym *sym;
+	Elf64_Word i;
+
+	for (i = buckettab[elf_sym_hash(name) % bucketnum]; i != STN_UNDEF;
+	     i = chaintab[i]) {
+		sym = &symtab->elf_symtab[i];
+		if (!strcmp(name, &symtab->elf_symstrtab[sym->st_name]))
+			return sym;
+	}
+
+	return NULL;
+}
+
+bool report_results(struct sgx_enclave_run *run, int ret, uint64_t result,
+		  const char *test)
+{
+	bool valid = true;
+
+	if (ret) {
+		printf("FAIL: %s() returned: %d\n", test, ret);
+		valid = false;
+	}
+
+	if (run->function != EEXIT) {
+		printf("FAIL: %s() function, expected: %u, got: %u\n", test, EEXIT,
+		       run->function);
+		valid = false;
+	}
+
+	if (result != MAGIC) {
+		printf("FAIL: %s(), expected: 0x%lx, got: 0x%lx\n", test, MAGIC,
+		       result);
+		valid = false;
+	}
+
+	if (run->user_data) {
+		printf("FAIL: %s() user data, expected: 0x0, got: 0x%llx\n",
+		       test, run->user_data);
+		valid = false;
+	}
+
+	return valid;
+}
+
+static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r9,
+			struct sgx_enclave_run *run)
+{
+	run->user_data = 0;
+	return 0;
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+	struct sgx_enclave_run run;
+	struct vdso_symtab symtab;
+	Elf64_Sym *eenter_sym;
+	uint64_t result = 0;
+	struct encl encl;
+	unsigned int i;
+	void *addr;
+	int ret;
+
+	memset(&run, 0, sizeof(run));
+
+	if (!encl_load("test_encl.elf", &encl)) {
+		encl_delete(&encl);
+		ksft_exit_skip("cannot load enclaves\n");
+	}
+
+	if (!encl_measure(&encl))
+		goto err;
+
+	if (!encl_build(&encl))
+		goto err;
+
+	/*
+	 * An enclave consumer only must do this.
+	 */
+	for (i = 0; i < encl.nr_segments; i++) {
+		struct encl_segment *seg = &encl.segment_tbl[i];
+
+		addr = mmap((void *)encl.encl_base + seg->offset, seg->size,
+			    seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0);
+		if (addr == MAP_FAILED) {
+			fprintf(stderr, "mmap() failed, errno=%d.\n", errno);
+			exit(KSFT_FAIL);
+		}
+	}
+
+	memset(&run, 0, sizeof(run));
+	run.tcs = encl.encl_base;
+
+	addr = vdso_get_base_addr(envp);
+	if (!addr)
+		goto err;
+
+	if (!vdso_get_symtab(addr, &symtab))
+		goto err;
+
+	eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave");
+	if (!eenter_sym)
+		goto err;
+
+	eenter = addr + eenter_sym->st_value;
+
+	ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run);
+	if (!report_results(&run, ret, result, "sgx_call_vdso"))
+		goto err;
+
+
+	/* Invoke the vDSO directly. */
+	result = 0;
+	ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER,
+		     0, 0, &run);
+	if (!report_results(&run, ret, result, "eenter"))
+		goto err;
+
+	/* And with an exit handler. */
+	run.user_handler = (__u64)user_handler;
+	run.user_data = 0xdeadbeef;
+	ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER,
+		     0, 0, &run);
+	if (!report_results(&run, ret, result, "user_handler"))
+		goto err;
+
+	printf("SUCCESS\n");
+	encl_delete(&encl);
+	exit(KSFT_PASS);
+
+err:
+	encl_delete(&encl);
+	exit(KSFT_FAIL);
+}
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
new file mode 100644
index 000000000000..45e6ab65442a
--- /dev/null
+++ b/tools/testing/selftests/sgx/main.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+#ifndef MAIN_H
+#define MAIN_H
+
+struct encl_segment {
+	off_t offset;
+	size_t size;
+	unsigned int prot;
+	unsigned int flags;
+};
+
+struct encl {
+	int fd;
+	void *bin;
+	off_t bin_size;
+	void *src;
+	size_t src_size;
+	size_t encl_size;
+	off_t encl_base;
+	unsigned int nr_segments;
+	struct encl_segment *segment_tbl;
+	struct sgx_secs secs;
+	struct sgx_sigstruct sigstruct;
+};
+
+void encl_delete(struct encl *ctx);
+bool encl_load(const char *path, struct encl *encl);
+bool encl_measure(struct encl *encl);
+bool encl_build(struct encl *encl);
+
+int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9,
+		  struct sgx_enclave_run *run);
+
+#endif /* MAIN_H */
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
new file mode 100644
index 000000000000..cc06f108bae7
--- /dev/null
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <openssl/err.h>
+#include <openssl/pem.h>
+#include "defines.h"
+#include "main.h"
+
+struct q1q2_ctx {
+	BN_CTX *bn_ctx;
+	BIGNUM *m;
+	BIGNUM *s;
+	BIGNUM *q1;
+	BIGNUM *qr;
+	BIGNUM *q2;
+};
+
+static void free_q1q2_ctx(struct q1q2_ctx *ctx)
+{
+	BN_CTX_free(ctx->bn_ctx);
+	BN_free(ctx->m);
+	BN_free(ctx->s);
+	BN_free(ctx->q1);
+	BN_free(ctx->qr);
+	BN_free(ctx->q2);
+}
+
+static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m,
+			   struct q1q2_ctx *ctx)
+{
+	ctx->bn_ctx = BN_CTX_new();
+	ctx->s = BN_bin2bn(s, SGX_MODULUS_SIZE, NULL);
+	ctx->m = BN_bin2bn(m, SGX_MODULUS_SIZE, NULL);
+	ctx->q1 = BN_new();
+	ctx->qr = BN_new();
+	ctx->q2 = BN_new();
+
+	if (!ctx->bn_ctx || !ctx->s || !ctx->m || !ctx->q1 || !ctx->qr ||
+	    !ctx->q2) {
+		free_q1q2_ctx(ctx);
+		return false;
+	}
+
+	return true;
+}
+
+static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
+		      uint8_t *q2)
+{
+	struct q1q2_ctx ctx;
+
+	if (!alloc_q1q2_ctx(s, m, &ctx)) {
+		fprintf(stderr, "Not enough memory for Q1Q2 calculation\n");
+		return false;
+	}
+
+	if (!BN_mul(ctx.q1, ctx.s, ctx.s, ctx.bn_ctx))
+		goto out;
+
+	if (!BN_div(ctx.q1, ctx.qr, ctx.q1, ctx.m, ctx.bn_ctx))
+		goto out;
+
+	if (BN_num_bytes(ctx.q1) > SGX_MODULUS_SIZE) {
+		fprintf(stderr, "Too large Q1 %d bytes\n",
+			BN_num_bytes(ctx.q1));
+		goto out;
+	}
+
+	if (!BN_mul(ctx.q2, ctx.s, ctx.qr, ctx.bn_ctx))
+		goto out;
+
+	if (!BN_div(ctx.q2, NULL, ctx.q2, ctx.m, ctx.bn_ctx))
+		goto out;
+
+	if (BN_num_bytes(ctx.q2) > SGX_MODULUS_SIZE) {
+		fprintf(stderr, "Too large Q2 %d bytes\n",
+			BN_num_bytes(ctx.q2));
+		goto out;
+	}
+
+	BN_bn2bin(ctx.q1, q1);
+	BN_bn2bin(ctx.q2, q2);
+
+	free_q1q2_ctx(&ctx);
+	return true;
+out:
+	free_q1q2_ctx(&ctx);
+	return false;
+}
+
+struct sgx_sigstruct_payload {
+	struct sgx_sigstruct_header header;
+	struct sgx_sigstruct_body body;
+};
+
+static bool check_crypto_errors(void)
+{
+	int err;
+	bool had_errors = false;
+	const char *filename;
+	int line;
+	char str[256];
+
+	for ( ; ; ) {
+		if (ERR_peek_error() == 0)
+			break;
+
+		had_errors = true;
+		err = ERR_get_error_line(&filename, &line);
+		ERR_error_string_n(err, str, sizeof(str));
+		fprintf(stderr, "crypto: %s: %s:%d\n", str, filename, line);
+	}
+
+	return had_errors;
+}
+
+static inline const BIGNUM *get_modulus(RSA *key)
+{
+	const BIGNUM *n;
+
+	RSA_get0_key(key, &n, NULL, NULL);
+	return n;
+}
+
+static RSA *gen_sign_key(void)
+{
+	BIGNUM *e;
+	RSA *key;
+	int ret;
+
+	e = BN_new();
+	key = RSA_new();
+
+	if (!e || !key)
+		goto err;
+
+	ret = BN_set_word(e, RSA_3);
+	if (ret != 1)
+		goto err;
+
+	ret = RSA_generate_key_ex(key, 3072, e, NULL);
+	if (ret != 1)
+		goto err;
+
+	BN_free(e);
+
+	return key;
+
+err:
+	RSA_free(key);
+	BN_free(e);
+
+	return NULL;
+}
+
+static void reverse_bytes(void *data, int length)
+{
+	int i = 0;
+	int j = length - 1;
+	uint8_t temp;
+	uint8_t *ptr = data;
+
+	while (i < j) {
+		temp = ptr[i];
+		ptr[i] = ptr[j];
+		ptr[j] = temp;
+		i++;
+		j--;
+	}
+}
+
+enum mrtags {
+	MRECREATE = 0x0045544145524345,
+	MREADD = 0x0000000044444145,
+	MREEXTEND = 0x00444E4554584545,
+};
+
+static bool mrenclave_update(EVP_MD_CTX *ctx, const void *data)
+{
+	if (!EVP_DigestUpdate(ctx, data, 64)) {
+		fprintf(stderr, "digest update failed\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool mrenclave_commit(EVP_MD_CTX *ctx, uint8_t *mrenclave)
+{
+	unsigned int size;
+
+	if (!EVP_DigestFinal_ex(ctx, (unsigned char *)mrenclave, &size)) {
+		fprintf(stderr, "digest commit failed\n");
+		return false;
+	}
+
+	if (size != 32) {
+		fprintf(stderr, "invalid digest size = %u\n", size);
+		return false;
+	}
+
+	return true;
+}
+
+struct mrecreate {
+	uint64_t tag;
+	uint32_t ssaframesize;
+	uint64_t size;
+	uint8_t reserved[44];
+} __attribute__((__packed__));
+
+
+static bool mrenclave_ecreate(EVP_MD_CTX *ctx, uint64_t blob_size)
+{
+	struct mrecreate mrecreate;
+	uint64_t encl_size;
+
+	for (encl_size = 0x1000; encl_size < blob_size; )
+		encl_size <<= 1;
+
+	memset(&mrecreate, 0, sizeof(mrecreate));
+	mrecreate.tag = MRECREATE;
+	mrecreate.ssaframesize = 1;
+	mrecreate.size = encl_size;
+
+	if (!EVP_DigestInit_ex(ctx, EVP_sha256(), NULL))
+		return false;
+
+	return mrenclave_update(ctx, &mrecreate);
+}
+
+struct mreadd {
+	uint64_t tag;
+	uint64_t offset;
+	uint64_t flags; /* SECINFO flags */
+	uint8_t reserved[40];
+} __attribute__((__packed__));
+
+static bool mrenclave_eadd(EVP_MD_CTX *ctx, uint64_t offset, uint64_t flags)
+{
+	struct mreadd mreadd;
+
+	memset(&mreadd, 0, sizeof(mreadd));
+	mreadd.tag = MREADD;
+	mreadd.offset = offset;
+	mreadd.flags = flags;
+
+	return mrenclave_update(ctx, &mreadd);
+}
+
+struct mreextend {
+	uint64_t tag;
+	uint64_t offset;
+	uint8_t reserved[48];
+} __attribute__((__packed__));
+
+static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset,
+			      const uint8_t *data)
+{
+	struct mreextend mreextend;
+	int i;
+
+	for (i = 0; i < 0x1000; i += 0x100) {
+		memset(&mreextend, 0, sizeof(mreextend));
+		mreextend.tag = MREEXTEND;
+		mreextend.offset = offset + i;
+
+		if (!mrenclave_update(ctx, &mreextend))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0x00]))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0x40]))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0x80]))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0xC0]))
+			return false;
+	}
+
+	return true;
+}
+
+static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl,
+			      struct encl_segment *seg)
+{
+	uint64_t end = seg->offset + seg->size;
+	uint64_t offset;
+
+	for (offset = seg->offset; offset < end; offset += PAGE_SIZE) {
+		if (!mrenclave_eadd(ctx, offset, seg->flags))
+			return false;
+
+		if (!mrenclave_eextend(ctx, offset, encl->src + offset))
+			return false;
+	}
+
+	return true;
+}
+
+bool encl_measure(struct encl *encl)
+{
+	uint64_t header1[2] = {0x000000E100000006, 0x0000000000010000};
+	uint64_t header2[2] = {0x0000006000000101, 0x0000000100000060};
+	struct sgx_sigstruct *sigstruct = &encl->sigstruct;
+	struct sgx_sigstruct_payload payload;
+	uint8_t digest[SHA256_DIGEST_LENGTH];
+	unsigned int siglen;
+	RSA *key = NULL;
+	EVP_MD_CTX *ctx;
+	int i;
+
+	memset(sigstruct, 0, sizeof(*sigstruct));
+
+	sigstruct->header.header1[0] = header1[0];
+	sigstruct->header.header1[1] = header1[1];
+	sigstruct->header.header2[0] = header2[0];
+	sigstruct->header.header2[1] = header2[1];
+	sigstruct->exponent = 3;
+	sigstruct->body.attributes = SGX_ATTR_MODE64BIT;
+	sigstruct->body.xfrm = 3;
+
+	/* sanity check */
+	if (check_crypto_errors())
+		goto err;
+
+	key = gen_sign_key();
+	if (!key)
+		goto err;
+
+	BN_bn2bin(get_modulus(key), sigstruct->modulus);
+
+	ctx = EVP_MD_CTX_create();
+	if (!ctx)
+		goto err;
+
+	if (!mrenclave_ecreate(ctx, encl->src_size))
+		goto err;
+
+	for (i = 0; i < encl->nr_segments; i++) {
+		struct encl_segment *seg = &encl->segment_tbl[i];
+
+		if (!mrenclave_segment(ctx, encl, seg))
+			goto err;
+	}
+
+	if (!mrenclave_commit(ctx, sigstruct->body.mrenclave))
+		goto err;
+
+	memcpy(&payload.header, &sigstruct->header, sizeof(sigstruct->header));
+	memcpy(&payload.body, &sigstruct->body, sizeof(sigstruct->body));
+
+	SHA256((unsigned char *)&payload, sizeof(payload), digest);
+
+	if (!RSA_sign(NID_sha256, digest, SHA256_DIGEST_LENGTH,
+		      sigstruct->signature, &siglen, key))
+		goto err;
+
+	if (!calc_q1q2(sigstruct->signature, sigstruct->modulus, sigstruct->q1,
+		       sigstruct->q2))
+		goto err;
+
+	/* BE -> LE */
+	reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE);
+	reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE);
+	reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE);
+	reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE);
+
+	EVP_MD_CTX_destroy(ctx);
+	RSA_free(key);
+	return true;
+
+err:
+	EVP_MD_CTX_destroy(ctx);
+	RSA_free(key);
+	return false;
+}
diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c
new file mode 100644
index 000000000000..cf25b5dc1e03
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <stddef.h>
+#include "defines.h"
+
+static void *memcpy(void *dest, const void *src, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		((char *)dest)[i] = ((char *)src)[i];
+
+	return dest;
+}
+
+void encl_body(void *rdi, void *rsi)
+{
+	memcpy(rsi, rdi, 8);
+}
diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds
new file mode 100644
index 000000000000..0fbbda7e665e
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl.lds
@@ -0,0 +1,40 @@
+OUTPUT_FORMAT(elf64-x86-64)
+
+PHDRS
+{
+	tcs PT_LOAD;
+	text PT_LOAD;
+	data PT_LOAD;
+}
+
+SECTIONS
+{
+	. = 0;
+	.tcs : {
+		*(.tcs*)
+	} : tcs
+
+	. = ALIGN(4096);
+	.text : {
+		*(.text*)
+		*(.rodata*)
+	} : text
+
+	. = ALIGN(4096);
+	.data : {
+		*(.data*)
+	} : data
+
+	/DISCARD/ : {
+		*(.comment*)
+		*(.note*)
+		*(.debug*)
+		*(.eh_frame*)
+	}
+}
+
+ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves")
diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S
new file mode 100644
index 000000000000..5d5680d4ea39
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+	.macro ENCLU
+	.byte 0x0f, 0x01, 0xd7
+	.endm
+
+	.section ".tcs", "aw"
+	.balign	4096
+
+	.fill	1, 8, 0			# STATE (set by CPU)
+	.fill	1, 8, 0			# FLAGS
+	.quad	encl_ssa		# OSSA
+	.fill	1, 4, 0			# CSSA (set by CPU)
+	.fill	1, 4, 1			# NSSA
+	.quad	encl_entry		# OENTRY
+	.fill	1, 8, 0			# AEP (set by EENTER and ERESUME)
+	.fill	1, 8, 0			# OFSBASE
+	.fill	1, 8, 0			# OGSBASE
+	.fill	1, 4, 0xFFFFFFFF 	# FSLIMIT
+	.fill	1, 4, 0xFFFFFFFF	# GSLIMIT
+	.fill	4024, 1, 0		# Reserved
+
+	# Identical to the previous TCS.
+	.fill	1, 8, 0			# STATE (set by CPU)
+	.fill	1, 8, 0			# FLAGS
+	.quad	encl_ssa		# OSSA
+	.fill	1, 4, 0			# CSSA (set by CPU)
+	.fill	1, 4, 1			# NSSA
+	.quad	encl_entry		# OENTRY
+	.fill	1, 8, 0			# AEP (set by EENTER and ERESUME)
+	.fill	1, 8, 0			# OFSBASE
+	.fill	1, 8, 0			# OGSBASE
+	.fill	1, 4, 0xFFFFFFFF 	# FSLIMIT
+	.fill	1, 4, 0xFFFFFFFF	# GSLIMIT
+	.fill	4024, 1, 0		# Reserved
+
+	.text
+
+encl_entry:
+	# RBX contains the base address for TCS, which is also the first address
+	# inside the enclave. By adding the value of le_stack_end to it, we get
+	# the absolute address for the stack.
+	lea	(encl_stack)(%rbx), %rax
+	xchg	%rsp, %rax
+	push	%rax
+
+	push	%rcx # push the address after EENTER
+	push	%rbx # push the enclave base address
+
+	call	encl_body
+
+	pop	%rbx # pop the enclave base address
+
+	/* Clear volatile GPRs, except RAX (EEXIT function). */
+	xor     %rcx, %rcx
+	xor     %rdx, %rdx
+	xor     %rdi, %rdi
+	xor     %rsi, %rsi
+	xor     %r8, %r8
+	xor     %r9, %r9
+	xor     %r10, %r10
+	xor     %r11, %r11
+
+	# Reset status flags.
+	add     %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1
+
+	# Prepare EEXIT target by popping the address of the instruction after
+	# EENTER to RBX.
+	pop	%rbx
+
+	# Restore the caller stack.
+	pop	%rax
+	mov	%rax, %rsp
+
+	# EEXIT
+	mov	$4, %rax
+	enclu
+
+	.section ".data", "aw"
+
+encl_ssa:
+	.space 4096
+
+	.balign 4096
+	.space 8192
+encl_stack:
-- 
cgit v1.2.3


From 0eaa8d153a1d573e53b8283c90db44057d1376f6 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Wed, 18 Nov 2020 19:06:40 +0200
Subject: selftests/sgx: Use a statically generated 3072-bit RSA key

Use a statically generated key for signing the enclave, because
generating keys on the fly can eat the kernel entropy pool. Another
good reason for doing this is predictable builds. The RSA has been
arbitrarily selected. It's contents do not matter.

This also makes the selftest execute a lot quicker instead of the delay
that it had before (because of slow key generation).

 [ bp: Disambiguate "static key" which means something else in the
   kernel, fix typos. ]

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: linux-kselftest@vger.kernel.org
Link: https://lkml.kernel.org/r/20201118170640.39629-1-jarkko@kernel.org
---
 tools/testing/selftests/sgx/Makefile     |  6 ++++-
 tools/testing/selftests/sgx/main.h       |  3 +++
 tools/testing/selftests/sgx/sign_key.S   | 12 ++++++++++
 tools/testing/selftests/sgx/sign_key.pem | 39 ++++++++++++++++++++++++++++++++
 tools/testing/selftests/sgx/sigstruct.c  | 34 ++++++++++------------------
 5 files changed, 71 insertions(+), 23 deletions(-)
 create mode 100644 tools/testing/selftests/sgx/sign_key.S
 create mode 100644 tools/testing/selftests/sgx/sign_key.pem

(limited to 'tools')

diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index d51c90663943..7f12d55b97f8 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -25,7 +25,8 @@ endif
 $(OUTPUT)/test_sgx: $(OUTPUT)/main.o \
 		    $(OUTPUT)/load.o \
 		    $(OUTPUT)/sigstruct.o \
-		    $(OUTPUT)/call.o
+		    $(OUTPUT)/call.o \
+		    $(OUTPUT)/sign_key.o
 	$(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto
 
 $(OUTPUT)/main.o: main.c
@@ -40,6 +41,9 @@ $(OUTPUT)/sigstruct.o: sigstruct.c
 $(OUTPUT)/call.o: call.S
 	$(CC) $(HOST_CFLAGS) -c $< -o $@
 
+$(OUTPUT)/sign_key.o: sign_key.S
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
 $(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S
 	$(CC) $(ENCL_CFLAGS) -T $^ -o $@
 
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
index 45e6ab65442a..67211a708f04 100644
--- a/tools/testing/selftests/sgx/main.h
+++ b/tools/testing/selftests/sgx/main.h
@@ -27,6 +27,9 @@ struct encl {
 	struct sgx_sigstruct sigstruct;
 };
 
+extern unsigned char sign_key[];
+extern unsigned char sign_key_end[];
+
 void encl_delete(struct encl *ctx);
 bool encl_load(const char *path, struct encl *encl);
 bool encl_measure(struct encl *encl);
diff --git a/tools/testing/selftests/sgx/sign_key.S b/tools/testing/selftests/sgx/sign_key.S
new file mode 100644
index 000000000000..e4fbe948444a
--- /dev/null
+++ b/tools/testing/selftests/sgx/sign_key.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+* Copyright(c) 2016-20 Intel Corporation.
+*/
+
+    .section ".rodata", "a"
+
+sign_key:
+    .globl sign_key
+    .incbin "sign_key.pem"
+sign_key_end:
+    .globl sign_key_end
diff --git a/tools/testing/selftests/sgx/sign_key.pem b/tools/testing/selftests/sgx/sign_key.pem
new file mode 100644
index 000000000000..d76f21f19187
--- /dev/null
+++ b/tools/testing/selftests/sgx/sign_key.pem
@@ -0,0 +1,39 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIG4wIBAAKCAYEApalGbq7Q+usM91CPtksu3D+b0Prc8gAFL6grM3mg85A5Bx8V
+cfMXPgtrw8EYFwQxDAvzZWwl+9VfOX0ECrFRBkOHcOiG0SnADN8+FLj1UiNUQwbp
+S6OzhNWuRcSbGraSOyUlVlV0yMQSvewyzGklOaXBe30AJqzIBc8QfdSxKuP8rs0Z
+ga6k/Bl73osrYKByILJTUUeZqjLERsE6GebsdzbWgKn8qVqng4ZS4yMNg6LeRlH3
++9CIPgg4jwpSLHcp7dq2qTIB9a0tGe9ayp+5FbucpB6U7ePold0EeRN6RlJGDF9k
+L93v8P5ykz5G5gYZ2g0K1X2sHIWV4huxPgv5PXgdyQYbK+6olqj0d5rjYuwX57Ul
+k6SroPS1U6UbdCjG5txM+BNGU0VpD0ZhrIRw0leQdnNcCO9sTJuInZrgYacSVJ7u
+mtB+uCt+uzUesc+l+xPRYA+9e14lLkZp7AAmo9FvL816XDI09deehJ3i/LmHKCRN
+tuqC5TprRjFwUr6dAgEDAoIBgG5w2Z8fNfycs0+LCnmHdJLVEotR6KFVWMpwHMz7
+wKJgJgS/Y6FMuilc8oKAuroCy11dTO5IGVKOP3uorVx2NgQtBPXwWeDGgAiU1A3Q
+o4wXjYIEm4fCd63jyYPYZ2ckYXzDbjmOTdstYdPyzIhGGNEZK6eoqsRzMAPfYFPj
+IMdCqHSIu6vJw1K7p+myHOsVoWshjODaZnF3LYSA0WaZ8vokjwBxUxuRxQJZjJds
+s60XPtmL+qfgWtQFewoG4XL6GuD8FcXccynRRtzrLtFNPIl9BQfWfjBBhTC1/Te1
+0Z6XbZvpdUTD9OfLB7SbR2OUFNpKQgriO0iYVdbW3cr7uu38Zwp4W1TX73DPjoi6
+KNooP6SGWd4mRJW2+dUmSYS4QNG8eVVZswKcploEIXlAKRsOe4kzJJ1iETugIe85
+uX8nd1WYEp65xwoRUg8hqng0MeyveVbXqNKuJG6tzNDt9kgFYo+hmC/oouAW2Dtc
+T9jdRAwKJXqA2Eg6OkgXCEv+kwKBwQDYaQiFMlFhsmLlqI+EzCUh7c941/cL7m6U
+7j98+8ngl0HgCEcrc10iJVCKakQW3YbPzAx3XkKTaGjWazvvrFarXIGlOud64B8a
+iWyQ7VdlnmZnNEdk+C83tI91OQeaTKqRLDGzKh29Ry/jL8Pcbazt+kDgxa0H7qJp
+roADUanLQuNkYubpbhFBh3xpa2EExaVq6rF7nIVsD8W9TrbmPKA4LgH7z0iy544D
+kVCNYsTjYDdUWP+WiSor8kCnnpjnN9sCgcEAw/eNezUD1UDf6OYFC9+5JZJFn4Tg
+mZMyN93JKIb199ffwnjtHUSjcyiWeesXucpzwtGbTcwQnDisSW4oneYKLSEBlBaq
+scqiUugyGZZOthFSCbdXYXMViK2vHrKlkse7GxVlROKcEhM/pRBrmjaGO8eWR+D4
+FO2wCXzVs3KgV6j779frw0vC54oHOxc9+Lu1rSHp4i+600koyvL/zF6U/5tZXIvN
+YW2yoiQJnjCmVA1pwbwV6KAUTPDTMnBK+YjnAoHBAJBGBa4hi5Z27JkbCliIGMFJ
+NPs6pLKe9GNJf6in2+sPgUAFhMeiPhbDiwbxgrnpBIqICE+ULGJFmzmc0p/IOceT
+ARjR76dAFLxbnbXzj5kURETNhO36yiUjCk4mBRGIcbYddndxaSjaH+zKgpLzyJ6m
+1esuc1qfFvEfAAI2cTIsl5hB70ZJYNZaUvDyQK3ZGPHxy6e9rkgKg9OJz0QoatAe
+q/002yHvtAJg4F5B2JeVejg7VQ8GHB1MKxppu0TP5wKBwQCCpQj8zgKOKz/wmViy
+lSYZDC5qWJW7t3bP6TDFr06lOpUsUJ4TgxeiGw778g/RMaKB4RIz3WBoJcgw9BsT
+7rFza1ZiucchMcGMmswRDt8kC4wGejpA92Owc8oUdxkMhSdnY5jYlxK2t3/DYEe8
+JFl9L7mFQKVjSSAGUzkiTGrlG1Kf5UfXh9dFBq98uilQfSPIwUaWynyM23CHTKqI
+Pw3/vOY9sojrnncWwrEUIG7is5vWfWPwargzSzd29YdRBe8CgcEAuRVewK/YeNOX
+B7ZG6gKKsfsvrGtY7FPETzLZAHjoVXYNea4LVZ2kn4hBXXlvw/4HD+YqcTt4wmif
+5JQlDvjNobUiKJZpzy7hklVhF7wZFl4pCF7Yh43q9iQ7gKTaeUG7MiaK+G8Zz8aY
+HW9rsiihbdZkccMvnPfO9334XMxl3HtBRzLstjUlbLB7Sdh+7tZ3JQidCOFNs5pE
+XyWwnASPu4tKfDahH1UUTp1uJcq/6716CSWg080avYxFcn75qqsb
+-----END RSA PRIVATE KEY-----
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index cc06f108bae7..dee7a3d6c5a5 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -135,33 +135,21 @@ static inline const BIGNUM *get_modulus(RSA *key)
 
 static RSA *gen_sign_key(void)
 {
-	BIGNUM *e;
+	unsigned long sign_key_length;
+	BIO *bio;
 	RSA *key;
-	int ret;
 
-	e = BN_new();
-	key = RSA_new();
+	sign_key_length = (unsigned long)&sign_key_end -
+			  (unsigned long)&sign_key;
 
-	if (!e || !key)
-		goto err;
-
-	ret = BN_set_word(e, RSA_3);
-	if (ret != 1)
-		goto err;
-
-	ret = RSA_generate_key_ex(key, 3072, e, NULL);
-	if (ret != 1)
-		goto err;
+	bio = BIO_new_mem_buf(&sign_key, sign_key_length);
+	if (!bio)
+		return NULL;
 
-	BN_free(e);
+	key = PEM_read_bio_RSAPrivateKey(bio, NULL, NULL, NULL);
+	BIO_free(bio);
 
 	return key;
-
-err:
-	RSA_free(key);
-	BN_free(e);
-
-	return NULL;
 }
 
 static void reverse_bytes(void *data, int length)
@@ -339,8 +327,10 @@ bool encl_measure(struct encl *encl)
 		goto err;
 
 	key = gen_sign_key();
-	if (!key)
+	if (!key) {
+		ERR_print_errors_fp(stdout);
 		goto err;
+	}
 
 	BN_bn2bin(get_modulus(key), sigstruct->modulus);
 
-- 
cgit v1.2.3


From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Tue, 17 Nov 2020 18:45:49 +0000
Subject: bpf: Add bpf_ktime_get_coarse_ns helper

The helper uses CLOCK_MONOTONIC_COARSE source of time that is less
accurate but more performant.

We have a BPF CGROUP_SKB firewall that supports event logging through
bpf_perf_event_output(). Each event has a timestamp and currently we use
bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20
ns in time required for event logging.

bpf_ktime_get_ns():
EgressLogByRemoteEndpoint                              113.82ns    8.79M

bpf_ktime_get_coarse_ns():
EgressLogByRemoteEndpoint                               95.40ns   10.48M

Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru
---
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a52299b80b9d..3ca6146f001a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3797,6 +3797,16 @@ union bpf_attr {
  *		is cleared if the flag is not specified.
  *	Return
  *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
+ *
+ * u64 bpf_ktime_get_coarse_ns(void)
+ * 	Description
+ * 		Return a coarse-grained version of the time elapsed since
+ * 		system boot, in nanoseconds. Does not include time the system
+ * 		was suspended.
+ *
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3959,6 +3969,7 @@ union bpf_attr {
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
+	FN(ktime_get_coarse_ns),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 6016df8fe874e1cf36f6357d71438b384198ce06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@gmail.com>
Date: Wed, 18 Nov 2020 08:16:38 +0100
Subject: selftests/bpf: Fix broken riscv build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The selftests/bpf Makefile includes system include directories from
the host, when building BPF programs. On RISC-V glibc requires that
__riscv_xlen is defined. This is not the case for "clang -target bpf",
which messes up __WORDSIZE (errno.h -> ... -> wordsize.h) and breaks
the build.

By explicitly defining __risc_xlen correctly for riscv, we can
workaround this.

Fixes: 167381f3eac0 ("selftests/bpf: Makefile fix "missing" headers on build with -idirafter")
Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Luke Nelson <luke.r.nels@gmail.com>
Link: https://lore.kernel.org/bpf/20201118071640.83773-2-bjorn.topel@gmail.com
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c1708ffa6b1c..3d5940cd110d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -219,7 +219,8 @@ $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids	\
 # build would have failed anyways.
 define get_sys_includes
 $(shell $(1) -v -E - </dev/null 2>&1 \
-	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
 endef
 
 # Determine target endianness.
-- 
cgit v1.2.3


From c77b0589ca29ad1859fe7d7c1ecd63c0632379fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@gmail.com>
Date: Wed, 18 Nov 2020 08:16:39 +0100
Subject: selftests/bpf: Avoid running unprivileged tests with alignment
 requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some architectures have strict alignment requirements. In that case,
the BPF verifier detects if a program has unaligned accesses and
rejects them. A user can pass BPF_F_ANY_ALIGNMENT to a program to
override this check. That, however, will only work when a privileged
user loads a program. An unprivileged user loading a program with this
flag will be rejected prior entering the verifier.

Hence, it does not make sense to load unprivileged programs without
strict alignment when testing the verifier. This patch avoids exactly
that.

Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Luke Nelson <luke.r.nels@gmail.com>
Link: https://lore.kernel.org/bpf/20201118071640.83773-3-bjorn.topel@gmail.com
---
 tools/testing/selftests/bpf/test_verifier.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 9be395d9dc64..4bfe3aa2cfc4 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1152,6 +1152,19 @@ static void get_unpriv_disabled()
 
 static bool test_as_unpriv(struct bpf_test *test)
 {
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	/* Some architectures have strict alignment requirements. In
+	 * that case, the BPF verifier detects if a program has
+	 * unaligned accesses and rejects them. A user can pass
+	 * BPF_F_ANY_ALIGNMENT to a program to override this
+	 * check. That, however, will only work when a privileged user
+	 * loads a program. An unprivileged user loading a program
+	 * with this flag will be rejected prior entering the
+	 * verifier.
+	 */
+	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+		return false;
+#endif
 	return !test->prog_type ||
 	       test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
 	       test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
-- 
cgit v1.2.3


From 6007b23cc7555df882be870433dc589841d4eb06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@gmail.com>
Date: Wed, 18 Nov 2020 08:16:40 +0100
Subject: selftests/bpf: Mark tests that require unaligned memory access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A lot of tests require unaligned memory access to work. Mark the tests
as such, so that they can be avoided on unsupported architectures such
as RISC-V.

Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Luke Nelson <luke.r.nels@gmail.com>
Link: https://lore.kernel.org/bpf/20201118071640.83773-4-bjorn.topel@gmail.com
---
 .../testing/selftests/bpf/verifier/ctx_sk_lookup.c |  7 ++++
 .../selftests/bpf/verifier/direct_value_access.c   |  3 ++
 tools/testing/selftests/bpf/verifier/map_ptr.c     |  1 +
 .../selftests/bpf/verifier/raw_tp_writable.c       |  1 +
 .../testing/selftests/bpf/verifier/ref_tracking.c  |  4 ++
 tools/testing/selftests/bpf/verifier/regalloc.c    |  8 ++++
 tools/testing/selftests/bpf/verifier/wide_access.c | 46 +++++++++++++---------
 7 files changed, 52 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index 2ad5f974451c..fb13ca2d5606 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -266,6 +266,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
@@ -292,6 +293,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup remote_port field",
@@ -305,6 +307,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup local_ip4 field",
@@ -331,6 +334,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup local_port field",
@@ -344,6 +348,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
 {
@@ -410,6 +415,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
@@ -422,6 +428,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 /* in-bound and out-of-bound writes to bpf_sk_lookup */
 {
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index 988f46a1a4c7..c0648dc009b5 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -69,6 +69,7 @@
 	.fixup_map_array_48b = { 1 },
 	.result = REJECT,
 	.errstr = "R1 min value is outside of the allowed memory range",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"direct map access, write test 7",
@@ -195,6 +196,7 @@
 	.fixup_map_array_48b = { 1, 3 },
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"direct map access, write test 17",
@@ -209,6 +211,7 @@
 	.fixup_map_array_48b = { 1, 3 },
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"direct map access, write test 18",
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index 637f9293bda8..b117bdd3806d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -44,6 +44,7 @@
 	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
 	.result = REJECT,
 	.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"bpf_map_ptr: read ops field accepted",
diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
index 95b5d70a1dc1..2978fb5a769d 100644
--- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
+++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
@@ -31,4 +31,5 @@
 	.fixup_map_hash_8b = { 1, },
 	.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 	.errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 006b5bd99c08..3b6ee009c00b 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -675,6 +675,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use ptr from bpf_sk_fullsock() after release",
@@ -698,6 +699,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
@@ -725,6 +727,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use sk after bpf_sk_release(tp)",
@@ -747,6 +750,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
index 4ad7e05de706..bb0dd89dd212 100644
--- a/tools/testing/selftests/bpf/verifier/regalloc.c
+++ b/tools/testing/selftests/bpf/verifier/regalloc.c
@@ -21,6 +21,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc negative",
@@ -71,6 +72,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc src_reg negative",
@@ -97,6 +99,7 @@
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc and spill",
@@ -126,6 +129,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc and spill negative",
@@ -156,6 +160,7 @@
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=48 size=8",
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc three regs",
@@ -182,6 +187,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc after call",
@@ -210,6 +216,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc in callee",
@@ -240,6 +247,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc, spill, JEQ",
diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c
index ccade9312d21..55af248efa93 100644
--- a/tools/testing/selftests/bpf/verifier/wide_access.c
+++ b/tools/testing/selftests/bpf/verifier/wide_access.c
@@ -1,4 +1,4 @@
-#define BPF_SOCK_ADDR_STORE(field, off, res, err) \
+#define BPF_SOCK_ADDR_STORE(field, off, res, err, flgs)	\
 { \
 	"wide store to bpf_sock_addr." #field "[" #off "]", \
 	.insns = { \
@@ -11,31 +11,36 @@
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
 	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
 	.errstr = err, \
+	.flags = flgs, \
 }
 
 /* user_ip6[0] is u64 aligned */
 BPF_SOCK_ADDR_STORE(user_ip6, 0, ACCEPT,
-		    NULL),
+		    NULL, 0),
 BPF_SOCK_ADDR_STORE(user_ip6, 1, REJECT,
-		    "invalid bpf_context access off=12 size=8"),
+		    "invalid bpf_context access off=12 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_STORE(user_ip6, 2, ACCEPT,
-		    NULL),
+		    NULL, 0),
 BPF_SOCK_ADDR_STORE(user_ip6, 3, REJECT,
-		    "invalid bpf_context access off=20 size=8"),
+		    "invalid bpf_context access off=20 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 
 /* msg_src_ip6[0] is _not_ u64 aligned */
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 0, REJECT,
-		    "invalid bpf_context access off=44 size=8"),
+		    "invalid bpf_context access off=44 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 1, ACCEPT,
-		    NULL),
+		    NULL, 0),
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 2, REJECT,
-		    "invalid bpf_context access off=52 size=8"),
+		    "invalid bpf_context access off=52 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT,
-		    "invalid bpf_context access off=56 size=8"),
+		    "invalid bpf_context access off=56 size=8", 0),
 
 #undef BPF_SOCK_ADDR_STORE
 
-#define BPF_SOCK_ADDR_LOAD(field, off, res, err) \
+#define BPF_SOCK_ADDR_LOAD(field, off, res, err, flgs)	\
 { \
 	"wide load from bpf_sock_addr." #field "[" #off "]", \
 	.insns = { \
@@ -48,26 +53,31 @@ BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT,
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
 	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
 	.errstr = err, \
+	.flags = flgs, \
 }
 
 /* user_ip6[0] is u64 aligned */
 BPF_SOCK_ADDR_LOAD(user_ip6, 0, ACCEPT,
-		   NULL),
+		   NULL, 0),
 BPF_SOCK_ADDR_LOAD(user_ip6, 1, REJECT,
-		   "invalid bpf_context access off=12 size=8"),
+		   "invalid bpf_context access off=12 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_LOAD(user_ip6, 2, ACCEPT,
-		   NULL),
+		   NULL, 0),
 BPF_SOCK_ADDR_LOAD(user_ip6, 3, REJECT,
-		   "invalid bpf_context access off=20 size=8"),
+		   "invalid bpf_context access off=20 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 
 /* msg_src_ip6[0] is _not_ u64 aligned */
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 0, REJECT,
-		   "invalid bpf_context access off=44 size=8"),
+		   "invalid bpf_context access off=44 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 1, ACCEPT,
-		   NULL),
+		   NULL, 0),
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 2, REJECT,
-		   "invalid bpf_context access off=52 size=8"),
+		   "invalid bpf_context access off=52 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 3, REJECT,
-		   "invalid bpf_context access off=56 size=8"),
+		   "invalid bpf_context access off=56 size=8", 0),
 
 #undef BPF_SOCK_ADDR_LOAD
-- 
cgit v1.2.3


From a61ea6379ae9dbb63fbf022d1456733520db6be7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:22 +0900
Subject: tools/bootconfig: Fix errno reference after printf()

Fix not to refer the errno variable as the result of previous libc
functions after printf() because printf() can change the errno.

Link: https://lkml.kernel.org/r/160576520243.320071.51093664672431249.stgit@devnote2

Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 52 +++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index eb92027817a7..52eb2bbe8966 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -147,6 +147,12 @@ static int load_xbc_file(const char *path, char **buf)
 	return ret;
 }
 
+static int pr_errno(const char *msg, int err)
+{
+	pr_err("%s: %d\n", msg, err);
+	return err;
+}
+
 static int load_xbc_from_initrd(int fd, char **buf)
 {
 	struct stat stat;
@@ -162,26 +168,24 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN)
 		return 0;
 
-	if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) {
-		pr_err("Failed to lseek: %d\n", -errno);
-		return -errno;
-	}
+	if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
+		return pr_errno("Failed to lseek for magic", -errno);
+
 	if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
-		return -errno;
+		return pr_errno("Failed to read", -errno);
+
 	/* Check the bootconfig magic bytes */
 	if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
 		return 0;
 
-	if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) {
-		pr_err("Failed to lseek: %d\n", -errno);
-		return -errno;
-	}
+	if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0)
+		return pr_errno("Failed to lseek for size", -errno);
 
 	if (read(fd, &size, sizeof(u32)) < 0)
-		return -errno;
+		return pr_errno("Failed to read size", -errno);
 
 	if (read(fd, &csum, sizeof(u32)) < 0)
-		return -errno;
+		return pr_errno("Failed to read checksum", -errno);
 
 	/* Wrong size error  */
 	if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
@@ -190,10 +194,8 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	}
 
 	if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN),
-		  SEEK_SET) < 0) {
-		pr_err("Failed to lseek: %d\n", -errno);
-		return -errno;
-	}
+		  SEEK_SET) < 0)
+		return pr_errno("Failed to lseek", -errno);
 
 	ret = load_xbc_fd(fd, buf, size);
 	if (ret < 0)
@@ -262,14 +264,16 @@ static int show_xbc(const char *path, bool list)
 
 	ret = stat(path, &st);
 	if (ret < 0) {
-		pr_err("Failed to stat %s: %d\n", path, -errno);
-		return -errno;
+		ret = -errno;
+		pr_err("Failed to stat %s: %d\n", path, ret);
+		return ret;
 	}
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		pr_err("Failed to open initrd %s: %d\n", path, fd);
-		return -errno;
+		ret = -errno;
+		pr_err("Failed to open initrd %s: %d\n", path, ret);
+		return ret;
 	}
 
 	ret = load_xbc_from_initrd(fd, &buf);
@@ -307,8 +311,9 @@ static int delete_xbc(const char *path)
 
 	fd = open(path, O_RDWR);
 	if (fd < 0) {
-		pr_err("Failed to open initrd %s: %d\n", path, fd);
-		return -errno;
+		ret = -errno;
+		pr_err("Failed to open initrd %s: %d\n", path, ret);
+		return ret;
 	}
 
 	size = load_xbc_from_initrd(fd, &buf);
@@ -383,9 +388,10 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	/* Apply new one */
 	fd = open(path, O_RDWR | O_APPEND);
 	if (fd < 0) {
-		pr_err("Failed to open %s: %d\n", path, fd);
+		ret = -errno;
+		pr_err("Failed to open %s: %d\n", path, ret);
 		free(data);
-		return fd;
+		return ret;
 	}
 	/* TODO: Ensure the @path is initramfs/initrd image */
 	ret = write(fd, data, size + 8);
-- 
cgit v1.2.3


From a995e6bc0524450adfd6181dfdcd9d0520cfaba5 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:31 +0900
Subject: tools/bootconfig: Fix to check the write failure correctly

Fix to check the write(2) failure including partial write
correctly and try to rollback the partial write, because
if there is no BOOTCONFIG_MAGIC string, we can not remove it.

Link: https://lkml.kernel.org/r/160576521135.320071.3883101436675969998.stgit@devnote2

Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly")
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 52eb2bbe8966..a0733cbb3c49 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -337,6 +337,7 @@ static int delete_xbc(const char *path)
 
 static int apply_xbc(const char *path, const char *xbc_path)
 {
+	struct stat stat;
 	u32 size, csum;
 	char *buf, *data;
 	int ret, fd;
@@ -394,16 +395,26 @@ static int apply_xbc(const char *path, const char *xbc_path)
 		return ret;
 	}
 	/* TODO: Ensure the @path is initramfs/initrd image */
+	if (fstat(fd, &stat) < 0) {
+		pr_err("Failed to get the size of %s\n", path);
+		goto out;
+	}
 	ret = write(fd, data, size + 8);
-	if (ret < 0) {
+	if (ret < size + 8) {
+		if (ret < 0)
+			ret = -errno;
 		pr_err("Failed to apply a boot config: %d\n", ret);
-		goto out;
+		if (ret < 0)
+			goto out;
+		goto out_rollback;
 	}
 	/* Write a magic word of the bootconfig */
 	ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-	if (ret < 0) {
+	if (ret < BOOTCONFIG_MAGIC_LEN) {
+		if (ret < 0)
+			ret = -errno;
 		pr_err("Failed to apply a boot config magic: %d\n", ret);
-		goto out;
+		goto out_rollback;
 	}
 	ret = 0;
 out:
@@ -411,6 +422,17 @@ out:
 	free(data);
 
 	return ret;
+
+out_rollback:
+	/* Map the partial write to -ENOSPC */
+	if (ret >= 0)
+		ret = -ENOSPC;
+	if (ftruncate(fd, stat.st_size) < 0) {
+		ret = -errno;
+		pr_err("Failed to rollback the write error: %d\n", ret);
+		pr_err("The initrd %s may be corrupted. Recommend to rebuild.\n", path);
+	}
+	goto out;
 }
 
 static int usage(void)
-- 
cgit v1.2.3


From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:40 +0900
Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4

Align the bootconfig applied initrd image size to 4. To fill the gap,
the bootconfig command uses null characters in between the bootconfig
data and the footer. This will expands the footer size but don't change
the checksum.
Thus the block image of the initrd file with bootconfig is as follows.

[initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"]

Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c             | 57 ++++++++++++++++++++++---------------
 tools/bootconfig/test-bootconfig.sh |  6 +++-
 2 files changed, 39 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index a0733cbb3c49..4a445b6304bb 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -337,12 +337,13 @@ static int delete_xbc(const char *path)
 
 static int apply_xbc(const char *path, const char *xbc_path)
 {
+	char *buf, *data, *p;
+	size_t total_size;
 	struct stat stat;
+	const char *msg;
 	u32 size, csum;
-	char *buf, *data;
+	int pos, pad;
 	int ret, fd;
-	const char *msg;
-	int pos;
 
 	ret = load_xbc_file(xbc_path, &buf);
 	if (ret < 0) {
@@ -352,13 +353,12 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	size = strlen(buf) + 1;
 	csum = checksum((unsigned char *)buf, size);
 
-	/* Prepare xbc_path data */
-	data = malloc(size + 8);
+	/* Backup the bootconfig data */
+	data = calloc(size + BOOTCONFIG_ALIGN +
+		      sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1);
 	if (!data)
 		return -ENOMEM;
-	strcpy(data, buf);
-	*(u32 *)(data + size) = size;
-	*(u32 *)(data + size + 4) = csum;
+	memcpy(data, buf, size);
 
 	/* Check the data format */
 	ret = xbc_init(buf, &msg, &pos);
@@ -399,24 +399,35 @@ static int apply_xbc(const char *path, const char *xbc_path)
 		pr_err("Failed to get the size of %s\n", path);
 		goto out;
 	}
-	ret = write(fd, data, size + 8);
-	if (ret < size + 8) {
+
+	/* To align up the total size to BOOTCONFIG_ALIGN, get padding size */
+	total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN;
+	pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size;
+	size += pad;
+
+	/* Add a footer */
+	p = data + size;
+	*(u32 *)p = size;
+	p += sizeof(u32);
+
+	*(u32 *)p = csum;
+	p += sizeof(u32);
+
+	memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
+	p += BOOTCONFIG_MAGIC_LEN;
+
+	total_size = p - data;
+
+	ret = write(fd, data, total_size);
+	if (ret < total_size) {
 		if (ret < 0)
 			ret = -errno;
 		pr_err("Failed to apply a boot config: %d\n", ret);
-		if (ret < 0)
-			goto out;
-		goto out_rollback;
-	}
-	/* Write a magic word of the bootconfig */
-	ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-	if (ret < BOOTCONFIG_MAGIC_LEN) {
-		if (ret < 0)
-			ret = -errno;
-		pr_err("Failed to apply a boot config magic: %d\n", ret);
-		goto out_rollback;
-	}
-	ret = 0;
+		if (ret >= 0)
+			goto out_rollback;
+	} else
+		ret = 0;
+
 out:
 	close(fd);
 	free(data);
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index d295e406a756..baed891d0ba4 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -9,6 +9,7 @@ else
   TESTDIR=.
 fi
 BOOTCONF=${TESTDIR}/bootconfig
+ALIGN=4
 
 INITRD=`mktemp ${TESTDIR}/initrd-XXXX`
 TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf`
@@ -59,7 +60,10 @@ echo "Show command test"
 xpass $BOOTCONF $INITRD
 
 echo "File size check"
-xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12)
+total_size=$(expr $bconf_size + $initrd_size + 9 + 12 + $ALIGN - 1 )
+total_size=$(expr $total_size / $ALIGN)
+total_size=$(expr $total_size \* $ALIGN)
+xpass test $new_size -eq $total_size
 
 echo "Apply command repeat test"
 xpass $BOOTCONF -a $TEMPCONF $INITRD
-- 
cgit v1.2.3


From 450d060e8f752a6ce052a2bffd3f01633472e330 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 18 Nov 2020 23:30:39 -0800
Subject: bpftool: Add {i,d}tlb_misses support for bpftool profile

Commit 47c09d6a9f67("bpftool: Introduce "prog profile" command")
introduced "bpftool prog profile" command which can be used
to profile bpf program with metrics like # of instructions,

This patch added support for itlb_misses and dtlb_misses.
During an internal bpf program performance evaluation,
I found these two metrics are also very useful. The following
is an example output:

 $ bpftool prog profile id 324 duration 3 cycles itlb_misses

           1885029 run_cnt
        5134686073 cycles
            306893 itlb_misses

 $ bpftool prog profile id 324 duration 3 cycles dtlb_misses

           1827382 run_cnt
        4943593648 cycles
           5975636 dtlb_misses

 $ bpftool prog profile id 324 duration 3 cycles llc_misses

           1836527 run_cnt
        5019612972 cycles
           4161041 llc_misses

From the above, we can see quite some dtlb misses, 3 dtlb misses
perf prog run. This might be something worth further investigation.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201119073039.4060095-1-yhs@fb.com
---
 tools/bpf/bpftool/prog.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index acdb2c245f0a..1fe3ba255bad 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1717,6 +1717,34 @@ struct profile_metric {
 		.ratio_desc = "LLC misses per million insns",
 		.ratio_mul = 1e6,
 	},
+	{
+		.name = "itlb_misses",
+		.attr = {
+			.type = PERF_TYPE_HW_CACHE,
+			.config =
+				PERF_COUNT_HW_CACHE_ITLB |
+				(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+				(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+			.exclude_user = 1
+		},
+		.ratio_metric = 2,
+		.ratio_desc = "itlb misses per million insns",
+		.ratio_mul = 1e6,
+	},
+	{
+		.name = "dtlb_misses",
+		.attr = {
+			.type = PERF_TYPE_HW_CACHE,
+			.config =
+				PERF_COUNT_HW_CACHE_DTLB |
+				(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+				(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+			.exclude_user = 1
+		},
+		.ratio_metric = 2,
+		.ratio_desc = "dtlb misses per million insns",
+		.ratio_mul = 1e6,
+	},
 };
 
 static __u64 profile_total_count;
@@ -2109,7 +2137,7 @@ static int do_help(int argc, char **argv)
 		"                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
-		"       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
+		"       METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
-- 
cgit v1.2.3


From 192cf32243ce39af65bd095625aec374b38c03df Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 11 Oct 2020 10:47:45 -0500
Subject: selftests/seccomp: Compare bitmap vs filter overhead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As part of the seccomp benchmarking, include the expectations with
regard to the timing behavior of the constant action bitmaps, and report
inconsistencies better.

Example output with constant action bitmaps on x86:

$ sudo ./seccomp_benchmark 100000000
Current BPF sysctl settings:
net.core.bpf_jit_enable = 1
net.core.bpf_jit_harden = 0
Benchmarking 200000000 syscalls...
129.359381409 - 0.008724424 = 129350656985 (129.4s)
getpid native: 646 ns
264.385890006 - 129.360453229 = 135025436777 (135.0s)
getpid RET_ALLOW 1 filter (bitmap): 675 ns
399.400511893 - 264.387045901 = 135013465992 (135.0s)
getpid RET_ALLOW 2 filters (bitmap): 675 ns
545.872866260 - 399.401718327 = 146471147933 (146.5s)
getpid RET_ALLOW 3 filters (full): 732 ns
696.337101319 - 545.874097681 = 150463003638 (150.5s)
getpid RET_ALLOW 4 filters (full): 752 ns
Estimated total seccomp overhead for 1 bitmapped filter: 29 ns
Estimated total seccomp overhead for 2 bitmapped filters: 29 ns
Estimated total seccomp overhead for 3 full filters: 86 ns
Estimated total seccomp overhead for 4 full filters: 106 ns
Estimated seccomp entry overhead: 29 ns
Estimated seccomp per-filter overhead (last 2 diff): 20 ns
Estimated seccomp per-filter overhead (filters / 4): 19 ns
Expectations:
	native ≤ 1 bitmap (646 ≤ 675): ✔️
	native ≤ 1 filter (646 ≤ 732): ✔️
	per-filter (last 2 diff) ≈ per-filter (filters / 4) (20 ≈ 19): ✔️
	1 bitmapped ≈ 2 bitmapped (29 ≈ 29): ✔️
	entry ≈ 1 bitmapped (29 ≈ 29): ✔️
	entry ≈ 2 bitmapped (29 ≈ 29): ✔️
	native + entry + (per filter * 4) ≈ 4 filters total (755 ≈ 752): ✔️

[YiFei: Changed commit message to show stats for this patch series]
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/1b61df3db85c5f7f1b9202722c45e7b39df73ef2.1602431034.git.yifeifz2@illinois.edu
---
 .../testing/selftests/seccomp/seccomp_benchmark.c  | 151 ++++++++++++++++++---
 tools/testing/selftests/seccomp/settings           |   2 +-
 2 files changed, 130 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 91f5a89cadac..fcc806585266 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -4,12 +4,16 @@
  */
 #define _GNU_SOURCE
 #include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
 #include <linux/filter.h>
 #include <linux/seccomp.h>
+#include <sys/param.h>
 #include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
@@ -70,18 +74,74 @@ unsigned long long calibrate(void)
 	return samples * seconds;
 }
 
+bool approx(int i_one, int i_two)
+{
+	double one = i_one, one_bump = one * 0.01;
+	double two = i_two, two_bump = two * 0.01;
+
+	one_bump = one + MAX(one_bump, 2.0);
+	two_bump = two + MAX(two_bump, 2.0);
+
+	/* Equal to, or within 1% or 2 digits */
+	if (one == two ||
+	    (one > two && one <= two_bump) ||
+	    (two > one && two <= one_bump))
+		return true;
+	return false;
+}
+
+bool le(int i_one, int i_two)
+{
+	if (i_one <= i_two)
+		return true;
+	return false;
+}
+
+long compare(const char *name_one, const char *name_eval, const char *name_two,
+	     unsigned long long one, bool (*eval)(int, int), unsigned long long two)
+{
+	bool good;
+
+	printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
+	       (long long)one, name_eval, (long long)two);
+	if (one > INT_MAX) {
+		printf("Miscalculation! Measurement went negative: %lld\n", (long long)one);
+		return 1;
+	}
+	if (two > INT_MAX) {
+		printf("Miscalculation! Measurement went negative: %lld\n", (long long)two);
+		return 1;
+	}
+
+	good = eval(one, two);
+	printf("%s\n", good ? "✔️" : "❌");
+
+	return good ? 0 : 1;
+}
+
 int main(int argc, char *argv[])
 {
+	struct sock_filter bitmap_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog bitmap_prog = {
+		.len = (unsigned short)ARRAY_SIZE(bitmap_filter),
+		.filter = bitmap_filter,
+	};
 	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)ARRAY_SIZE(filter),
 		.filter = filter,
 	};
-	long ret;
-	unsigned long long samples;
-	unsigned long long native, filter1, filter2;
+
+	long ret, bits;
+	unsigned long long samples, calc;
+	unsigned long long native, filter1, filter2, bitmap1, bitmap2;
+	unsigned long long entry, per_filter1, per_filter2;
 
 	printf("Current BPF sysctl settings:\n");
 	system("sysctl net.core.bpf_jit_enable");
@@ -101,35 +161,82 @@ int main(int argc, char *argv[])
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	assert(ret == 0);
 
-	/* One filter */
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	/* One filter resulting in a bitmap */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
 	assert(ret == 0);
 
-	filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-	printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1);
+	bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
+
+	/* Second filter resulting in a bitmap */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+	assert(ret == 0);
 
-	if (filter1 == native)
-		printf("No overhead measured!? Try running again with more samples.\n");
+	bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
 
-	/* Two filters */
+	/* Third filter, can no longer be converted to bitmap */
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 	assert(ret == 0);
 
-	filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-	printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2);
-
-	/* Calculations */
-	printf("Estimated total seccomp overhead for 1 filter: %llu ns\n",
-		filter1 - native);
+	filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
 
-	printf("Estimated total seccomp overhead for 2 filters: %llu ns\n",
-		filter2 - native);
+	/* Fourth filter, can not be converted to bitmap because of filter 3 */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+	assert(ret == 0);
 
-	printf("Estimated seccomp per-filter overhead: %llu ns\n",
-		filter2 - filter1);
+	filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
+
+	/* Estimations */
+#define ESTIMATE(fmt, var, what)	do {			\
+		var = (what);					\
+		printf("Estimated " fmt ": %llu ns\n", var);	\
+		if (var > INT_MAX)				\
+			goto more_samples;			\
+	} while (0)
+
+	ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc,
+		 bitmap1 - native);
+	ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc,
+		 bitmap2 - native);
+	ESTIMATE("total seccomp overhead for 3 full filters", calc,
+		 filter1 - native);
+	ESTIMATE("total seccomp overhead for 4 full filters", calc,
+		 filter2 - native);
+	ESTIMATE("seccomp entry overhead", entry,
+		 bitmap1 - native - (bitmap2 - bitmap1));
+	ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1,
+		 filter2 - filter1);
+	ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2,
+		 (filter2 - native - entry) / 4);
+
+	printf("Expectations:\n");
+	ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1);
+	bits = compare("native", "≤", "1 filter", native, le, filter1);
+	if (bits)
+		goto more_samples;
+
+	ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)",
+			per_filter1, approx, per_filter2);
+
+	bits = compare("1 bitmapped", "≈", "2 bitmapped",
+			bitmap1 - native, approx, bitmap2 - native);
+	if (bits) {
+		printf("Skipping constant action bitmap expectations: they appear unsupported.\n");
+		goto out;
+	}
 
-	printf("Estimated seccomp entry overhead: %llu ns\n",
-		filter1 - native - (filter2 - filter1));
+	ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native);
+	ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native);
+	ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total",
+			entry + (per_filter1 * 4) + native, approx, filter2);
+	if (ret == 0)
+		goto out;
 
+more_samples:
+	printf("Saw unexpected benchmark result. Try running again with more samples?\n");
+out:
 	return 0;
 }
diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings
index ba4d85f74cd6..6091b45d226b 100644
--- a/tools/testing/selftests/seccomp/settings
+++ b/tools/testing/selftests/seccomp/settings
@@ -1 +1 @@
-timeout=90
+timeout=120
-- 
cgit v1.2.3


From fbb8531e58bd989868db3c2513d06870c46bd87f Mon Sep 17 00:00:00 2001
From: Antonio Cardace <acardace@redhat.com>
Date: Wed, 18 Nov 2020 21:45:20 +0100
Subject: selftests: extract common functions in ethtool-common.sh

Factor out some useful functions so that they can be reused
by other ethtool-netdevsim scripts.

Signed-off-by: Antonio Cardace <acardace@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/netdevsim/ethtool-common.sh        | 69 ++++++++++++++++++++++
 .../drivers/net/netdevsim/ethtool-pause.sh         | 63 +-------------------
 2 files changed, 71 insertions(+), 61 deletions(-)
 create mode 100644 tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
new file mode 100644
index 000000000000..fa44cf6e732c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+    if [ -e $NSIM_DEV_SYS ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/del_device
+    fi
+}
+
+function cleanup {
+    cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function get_netdev_name {
+    local -n old=$1
+
+    new=$(ls /sys/class/net)
+
+    for netdev in $new; do
+	for check in $old; do
+            [ $netdev == $check ] && break
+	done
+
+	if [ $netdev != $check ]; then
+	    echo $netdev
+	    break
+	fi
+    done
+}
+
+function check {
+    local code=$1
+    local str=$2
+    local exp_str=$3
+
+    if [ $code -ne 0 ]; then
+	((num_errors++))
+	return
+    fi
+
+    if [ "$str" != "$exp_str"  ]; then
+	echo -e "Expected: '$exp_str', got '$str'"
+	((num_errors++))
+	return
+    fi
+
+    ((num_passes++))
+}
+
+function make_netdev {
+    # Make a netdevsim
+    old_netdevs=$(ls /sys/class/net)
+
+    if ! $(lsmod | grep -q netdevsim); then
+	modprobe netdevsim
+    fi
+
+    echo $NSIM_ID > /sys/bus/netdevsim/new_device
+    echo `get_netdev_name old_netdevs`
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
index 25c896b9e2eb..b4a7abfe5454 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -1,60 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0-only
 
-NSIM_ID=$((RANDOM % 1024))
-NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
-NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
-NSIM_NETDEV=
-num_passes=0
-num_errors=0
-
-function cleanup_nsim {
-    if [ -e $NSIM_DEV_SYS ]; then
-	echo $NSIM_ID > /sys/bus/netdevsim/del_device
-    fi
-}
-
-function cleanup {
-    cleanup_nsim
-}
-
-trap cleanup EXIT
-
-function get_netdev_name {
-    local -n old=$1
-
-    new=$(ls /sys/class/net)
-
-    for netdev in $new; do
-	for check in $old; do
-            [ $netdev == $check ] && break
-	done
-
-	if [ $netdev != $check ]; then
-	    echo $netdev
-	    break
-	fi
-    done
-}
-
-function check {
-    local code=$1
-    local str=$2
-    local exp_str=$3
-
-    if [ $code -ne 0 ]; then
-	((num_errors++))
-	return
-    fi
-
-    if [ "$str" != "$exp_str"  ]; then
-	echo -e "Expected: '$exp_str', got '$str'"
-	((num_errors++))
-	return
-    fi
-
-    ((num_passes++))
-}
+source ethtool-common.sh
 
 # Bail if ethtool is too old
 if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
@@ -62,13 +9,7 @@ if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
     exit 4
 fi
 
-# Make a netdevsim
-old_netdevs=$(ls /sys/class/net)
-
-modprobe netdevsim
-echo $NSIM_ID > /sys/bus/netdevsim/new_device
-
-NSIM_NETDEV=`get_netdev_name old_netdevs`
+NSIM_NETDEV=$(make_netdev)
 
 set -o pipefail
 
-- 
cgit v1.2.3


From 9e48ee80ac4e04c9985379d58248dd2a96a170ef Mon Sep 17 00:00:00 2001
From: Antonio Cardace <acardace@redhat.com>
Date: Wed, 18 Nov 2020 21:45:21 +0100
Subject: selftests: refactor get_netdev_name function

As pointed out by Michal Kubecek, getting the name
with the previous approach was racy, it's better
and easier to get the name of the device with this
patch's approach.

Essentialy the function doesn't need to exist
anymore as it's a simple 'ls' command.

Signed-off-by: Antonio Cardace <acardace@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/netdevsim/ethtool-common.sh          | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index fa44cf6e732c..9f64d5c7107b 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -20,23 +20,6 @@ function cleanup {
 
 trap cleanup EXIT
 
-function get_netdev_name {
-    local -n old=$1
-
-    new=$(ls /sys/class/net)
-
-    for netdev in $new; do
-	for check in $old; do
-            [ $netdev == $check ] && break
-	done
-
-	if [ $netdev != $check ]; then
-	    echo $netdev
-	    break
-	fi
-    done
-}
-
 function check {
     local code=$1
     local str=$2
@@ -65,5 +48,6 @@ function make_netdev {
     fi
 
     echo $NSIM_ID > /sys/bus/netdevsim/new_device
-    echo `get_netdev_name old_netdevs`
+    # get new device name
+    ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/
 }
-- 
cgit v1.2.3


From fbb7a1f8137df4a693ea2b44096ad8ec518e3db1 Mon Sep 17 00:00:00 2001
From: Antonio Cardace <acardace@redhat.com>
Date: Wed, 18 Nov 2020 21:45:22 +0100
Subject: selftests: add ring and coalesce selftests

Add scripts to test ring and coalesce settings
of netdevsim.

Signed-off-by: Antonio Cardace <acardace@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/netdevsim/ethtool-coalesce.sh      | 132 +++++++++++++++++++++
 .../drivers/net/netdevsim/ethtool-ring.sh          |  85 +++++++++++++
 2 files changed, 217 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
new file mode 100755
index 000000000000..9adfba8f87e6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+    local query="${SETTINGS_MAP[$1]}"
+
+    echo $(ethtool -c $NSIM_NETDEV | \
+        awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+    for key in ${!SETTINGS_MAP[@]}; do
+        CURRENT_SETTINGS[$key]=$(get_value $key)
+    done
+    echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q coalesce; then
+    echo "SKIP: No --coalesce support in ethtool"
+    exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+    ["rx-frames-low"]="rx-frame-low"
+    ["tx-frames-low"]="tx-frame-low"
+    ["rx-frames-high"]="rx-frame-high"
+    ["tx-frames-high"]="tx-frame-high"
+    ["rx-usecs"]="rx-usecs"
+    ["rx-frames"]="rx-frames"
+    ["rx-usecs-irq"]="rx-usecs-irq"
+    ["rx-frames-irq"]="rx-frames-irq"
+    ["tx-usecs"]="tx-usecs"
+    ["tx-frames"]="tx-frames"
+    ["tx-usecs-irq"]="tx-usecs-irq"
+    ["tx-frames-irq"]="tx-frames-irq"
+    ["stats-block-usecs"]="stats-block-usecs"
+    ["pkt-rate-low"]="pkt-rate-low"
+    ["rx-usecs-low"]="rx-usecs-low"
+    ["tx-usecs-low"]="tx-usecs-low"
+    ["pkt-rate-high"]="pkt-rate-high"
+    ["rx-usecs-high"]="rx-usecs-high"
+    ["tx-usecs-high"]="tx-usecs-high"
+    ["sample-interval"]="sample-interval"
+)
+
+declare -A CURRENT_SETTINGS=(
+    ["rx-frames-low"]=""
+    ["tx-frames-low"]=""
+    ["rx-frames-high"]=""
+    ["tx-frames-high"]=""
+    ["rx-usecs"]=""
+    ["rx-frames"]=""
+    ["rx-usecs-irq"]=""
+    ["rx-frames-irq"]=""
+    ["tx-usecs"]=""
+    ["tx-frames"]=""
+    ["tx-usecs-irq"]=""
+    ["tx-frames-irq"]=""
+    ["stats-block-usecs"]=""
+    ["pkt-rate-low"]=""
+    ["rx-usecs-low"]=""
+    ["tx-usecs-low"]=""
+    ["pkt-rate-high"]=""
+    ["rx-usecs-high"]=""
+    ["tx-usecs-high"]=""
+    ["sample-interval"]=""
+)
+
+declare -A EXPECTED_SETTINGS=(
+    ["rx-frames-low"]=""
+    ["tx-frames-low"]=""
+    ["rx-frames-high"]=""
+    ["tx-frames-high"]=""
+    ["rx-usecs"]=""
+    ["rx-frames"]=""
+    ["rx-usecs-irq"]=""
+    ["rx-frames-irq"]=""
+    ["tx-usecs"]=""
+    ["tx-frames"]=""
+    ["tx-usecs-irq"]=""
+    ["tx-frames-irq"]=""
+    ["stats-block-usecs"]=""
+    ["pkt-rate-low"]=""
+    ["rx-usecs-low"]=""
+    ["tx-usecs-low"]=""
+    ["pkt-rate-high"]=""
+    ["rx-usecs-high"]=""
+    ["tx-usecs-high"]=""
+    ["sample-interval"]=""
+)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+    EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+    value=$((RANDOM % $((2**32-1))))
+
+    ethtool -C $NSIM_NETDEV "$key" "$value"
+
+    EXPECTED_SETTINGS[$key]="$value"
+    expected=${EXPECTED_SETTINGS[@]}
+    current=$(update_current_settings)
+
+    check $? "$current" "$expected"
+    set +x
+done
+
+# bool settings which ethtool displays on the same line
+ethtool -C $NSIM_NETDEV adaptive-rx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on  TX: off")
+check $? "$s" ""
+
+ethtool -C $NSIM_NETDEV adaptive-tx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on  TX: on")
+check $? "$s" ""
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
new file mode 100755
index 000000000000..c969559ffa7a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+    local query="${SETTINGS_MAP[$1]}"
+
+    echo $(ethtool -g $NSIM_NETDEV | \
+        tail -n +$CURR_SETT_LINE | \
+        awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+    for key in ${!SETTINGS_MAP[@]}; do
+        CURRENT_SETTINGS[$key]=$(get_value $key)
+    done
+    echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q set-ring >/dev/null; then
+    echo "SKIP: No --set-ring support in ethtool"
+    exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+    ["rx"]="RX"
+    ["rx-mini"]="RX Mini"
+    ["rx-jumbo"]="RX Jumbo"
+    ["tx"]="TX"
+)
+
+declare -A EXPECTED_SETTINGS=(
+    ["rx"]=""
+    ["rx-mini"]=""
+    ["rx-jumbo"]=""
+    ["tx"]=""
+)
+
+declare -A CURRENT_SETTINGS=(
+    ["rx"]=""
+    ["rx-mini"]=""
+    ["rx-jumbo"]=""
+    ["tx"]=""
+)
+
+MAX_VALUE=$((RANDOM % $((2**32-1))))
+RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
+
+for ring_max_entry in $RING_MAX_LIST; do
+    echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
+done
+
+CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+    EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+    value=$((RANDOM % $MAX_VALUE))
+
+    ethtool -G $NSIM_NETDEV "$key" "$value"
+
+    EXPECTED_SETTINGS[$key]="$value"
+    expected=${EXPECTED_SETTINGS[@]}
+    current=$(update_current_settings)
+
+    check $? "$current" "$expected"
+    set +x
+done
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
-- 
cgit v1.2.3


From 20ac8f8690535161d9357f5b4af4dfdf88c56578 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:44 +0200
Subject: selftests: mlxsw: Add nexthop objects configuration tests

Test that unsupported nexthop objects are rejected and that offload
indication is correctly set on: nexthop objects, nexthop group objects
and routes associated these objects.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 189 +++++++++++++++++++++
 1 file changed, 189 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index f4031002d5e9..5de47d72f8c9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -29,6 +29,10 @@ ALL_TESTS="
 	bridge_extern_learn_test
 	neigh_offload_test
 	nexthop_offload_test
+	nexthop_obj_invalid_test
+	nexthop_obj_offload_test
+	nexthop_obj_group_offload_test
+	nexthop_obj_route_offload_test
 	devlink_reload_test
 "
 NUM_NETIFS=2
@@ -674,6 +678,191 @@ nexthop_offload_test()
 	sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down
 }
 
+nexthop_obj_invalid_test()
+{
+	# Test that invalid nexthop object configurations are rejected
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.3 fdb
+	check_fail $? "managed to configure an FDB nexthop when should not"
+
+	ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
+	check_fail $? "managed to configure a nexthop with MPLS encap when should not"
+
+	ip nexthop add id 1 blackhole
+	check_fail $? "managed to configure a blackhole nexthop when should not"
+
+	ip nexthop add id 1 dev $swp1
+	ip nexthop add id 2 dev $swp1
+	ip nexthop add id 10 group 1/2
+	check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+
+	log_test "nexthop objects - invalid configurations"
+
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_offload_test()
+{
+	# Test offload indication of nexthop objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded when should"
+
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop marked as offloaded after setting neigh to failed state"
+
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded after neigh replace"
+
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop marked as offloaded after replacing to use an invalid address"
+
+	ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded after replacing to use a valid address"
+
+	log_test "nexthop objects offload indication"
+
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_group_offload_test()
+{
+	# Test offload indication of nexthop group objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+	ip nexthop add id 10 group 1/2
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "IPv4 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 2
+	check_err $? "IPv6 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group not marked as offloaded when should"
+
+	# Invalidate nexthop id 1
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 10
+	check_fail $? "nexthop group not marked as offloaded with one valid nexthop"
+
+	# Invalidate nexthop id 2
+	ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group marked as offloaded when should not"
+
+	# Revalidate nexthop id 1
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group not marked as offloaded after revalidating nexthop"
+
+	log_test "nexthop group objects offload indication"
+
+	ip neigh del 2001:db8:1::2 dev $swp1
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 10
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_route_offload_test()
+{
+	# Test offload indication of routes using nexthop objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	ip route replace 198.51.100.0/24 nhid 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded when using valid nexthop"
+
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one"
+
+	ip nexthop replace id 1 via 192.0.2.4 dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one"
+
+	ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one"
+
+	log_test "routes using nexthop objects offload indication"
+
+	ip route del 198.51.100.0/24
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
 devlink_reload_test()
 {
 	# Test that after executing all the above configuration tests, a
-- 
cgit v1.2.3


From ffb721515bf3352f38457fd2ab19f575e75e190e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:45 +0200
Subject: selftests: forwarding: Do not configure nexthop objects twice

routing_nh_obj() is used to configure the nexthop objects employed by
the test, but it is called twice resulting in "RTNETLINK answers: File
exists" messages.

Remove the first call, so that the function is only called after
setup_wait(), when all the interfaces are up and ready.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index cf3d26c233e8..6067477ff326 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -312,7 +312,6 @@ setup_prepare()
 
 	router1_create
 	router2_create
-	routing_nh_obj
 
 	forwarding_enable
 }
-- 
cgit v1.2.3


From 3600f29ad1399a1335af2030e8106ac8bbe9261a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:46 +0200
Subject: selftests: forwarding: Test IPv4 routes with IPv6 link-local nexthops

In addition to IPv4 multipath tests with IPv4 nexthops, also test IPv4
multipath with nexthops that use IPv6 link-local addresses.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 6067477ff326..e8c2573d5232 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -280,6 +280,17 @@ multipath_test()
 	multipath4_test "Weighted MP 2:1" 2 1
 	multipath4_test "Weighted MP 11:45" 11 45
 
+	log_info "Running IPv4 multipath tests with IPv6 link-local nexthops"
+	ip nexthop replace id 101 via fe80:2::22 dev $rp12
+	ip nexthop replace id 102 via fe80:3::23 dev $rp13
+
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 102 via 169.254.3.23 dev $rp13
+	ip nexthop replace id 101 via 169.254.2.22 dev $rp12
+
 	log_info "Running IPv6 multipath tests"
 	multipath6_test "ECMP" 1 1
 	multipath6_test "Weighted MP 2:1" 2 1
-- 
cgit v1.2.3


From e96fa54bbd90e487a8c230155db4231d9326ebcc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:47 +0200
Subject: selftests: forwarding: Add device-only nexthop test

In a similar fashion to router_multipath.sh and its nexthop objects
version router_mpath_nh.sh, create a nexthop objects version of
router.sh.

It reuses the same topology, but uses device-only nexthop objects
instead of legacy ones.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/router_nh.sh  | 160 +++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_nh.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
new file mode 100755
index 000000000000..f3a53738bdcc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+routing_nh_obj()
+{
+	# Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could
+	# use them.
+	ip -6 nexthop add id 101 dev $rp1
+	ip -6 nexthop add id 102 dev $rp2
+
+	ip route replace 192.0.2.0/24 nhid 101
+	ip route replace 2001:db8:1::/64 nhid 101
+	ip route replace 198.51.100.0/24 nhid 102
+	ip route replace 2001:db8:2::/64 nhid 102
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From e035146d65603165078629671afa9409f659a358 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:48 +0200
Subject: selftests: forwarding: Add multipath tunneling nexthop test

Add a nexthop objects version of gre_multipath.sh. Unlike the original
test, it also tests IPv6 overlay which is not possible with the legacy
nexthop implementation. See commit 9a2ad3623868 ("selftests: forwarding:
gre_multipath: Drop IPv6 tests") for more info.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/gre_multipath_nh.sh   | 356 +++++++++++++++++++++
 1 file changed, 356 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/gre_multipath_nh.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
new file mode 100755
index 000000000000..d03aa2cab9fd
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -0,0 +1,356 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |  2001:db8:1::2/64                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# |  2001:db8:2::1/64 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# |  2001:db8:2::2/64       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_ipv4
+	multipath_ipv6
+	multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip -6 nexthop add id 101 dev g1a
+	ip -6 nexthop add id 102 dev g1b
+	ip nexthop add id 103 group 101/102
+
+	ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+	ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip nexthop del id 103
+	ip -6 nexthop del id 102
+	ip -6 nexthop del id 101
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip -6 nexthop add id 201 dev g2a
+	ip -6 nexthop add id 202 dev g2b
+	ip nexthop add id 203 group 201/202
+
+	ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+	ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 2001:db8:1::/64
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip nexthop del id 203
+	ip -6 nexthop del id 202
+	ip -6 nexthop del id 201
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 0
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	# Generate 16384 echo requests, each with a random flow label.
+	for ((i=0; i < 16384; ++i)); do
+		ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+	done
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+		$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 8b819a84d4b12c4a91cc9f91ad69ca09c3e0606d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 19 Nov 2020 11:45:57 -0800
Subject: selftests: mptcp: add link failure test case

Add a test case where a link fails with multiple subflows.
The expectation is that MPTCP will transmit any data that
could not be delivered via the failed link on another subflow.

Co-developed-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 104 +++++++++++++++++++-----
 1 file changed, 82 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 0d93b243695f..f841ed8186c1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -5,6 +5,7 @@ ret=0
 sin=""
 sout=""
 cin=""
+cinsent=""
 cout=""
 ksft_skip=4
 timeout=30
@@ -81,7 +82,7 @@ cleanup_partial()
 cleanup()
 {
 	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout"
+	rm -f "$sin" "$sout" "$cinsent"
 	cleanup_partial
 }
 
@@ -144,6 +145,13 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
+print_file_err()
+{
+	ls -l "$1" 1>&2
+	echo "Trailing bytes are: "
+	tail -c 27 "$1"
+}
+
 check_transfer()
 {
 	in=$1
@@ -155,6 +163,7 @@ check_transfer()
 		echo "[ FAIL ] $what does not match (in, out):"
 		print_file_err "$in"
 		print_file_err "$out"
+		ret=1
 
 		return 1
 	fi
@@ -175,6 +184,17 @@ do_ping()
 	fi
 }
 
+link_failure()
+{
+	ns="$1"
+
+	l=$((RANDOM%4))
+	l=$((l+1))
+
+	veth="ns1eth$l"
+	ip -net "$ns" link set "$veth" down
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -182,9 +202,10 @@ do_transfer()
 	cl_proto="$3"
 	srv_proto="$4"
 	connect_addr="$5"
-	rm_nr_ns1="$6"
-	rm_nr_ns2="$7"
-	speed="$8"
+	test_link_fail="$6"
+	rm_nr_ns1="$7"
+	rm_nr_ns2="$8"
+	speed="$9"
 
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
@@ -220,7 +241,12 @@ do_transfer()
 
 	sleep 1
 
-	ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+	if [ "$test_link_fail" -eq 0 ];then
+		ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+	else
+		( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \
+		ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" &
+	fi
 	cpid=$!
 
 	if [ $rm_nr_ns1 -gt 0 ]; then
@@ -265,12 +291,17 @@ do_transfer()
 		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
 
 		cat "$capout"
+		ret=1
 		return 1
 	fi
 
 	check_transfer $sin $cout "file received by client"
 	retc=$?
-	check_transfer $cin $sout "file received by server"
+	if [ "$test_link_fail" -eq 0 ];then
+		check_transfer $cin $sout "file received by server"
+	else
+		check_transfer $cinsent $sout "file received by server"
+	fi
 	rets=$?
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
@@ -286,13 +317,12 @@ make_file()
 {
 	name=$1
 	who=$2
+	size=$3
 
-	SIZE=1
-
-	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
 
-	echo "Created $name (size $SIZE KB) containing data sent by $who"
+	echo "Created $name (size $size KB) containing data sent by $who"
 }
 
 run_tests()
@@ -300,14 +330,32 @@ run_tests()
 	listener_ns="$1"
 	connector_ns="$2"
 	connect_addr="$3"
-	rm_nr_ns1="${4:-0}"
-	rm_nr_ns2="${5:-0}"
-	speed="${6:-fast}"
+	test_linkfail="${4:-0}"
+	rm_nr_ns1="${5:-0}"
+	rm_nr_ns2="${6:-0}"
+	speed="${7:-fast}"
 	lret=0
+	oldin=""
+
+	if [ "$test_linkfail" -eq 1 ];then
+		size=$((RANDOM%1024))
+		size=$((size+1))
+		size=$((size*128))
+
+		oldin=$(mktemp)
+		cp "$cin" "$oldin"
+		make_file "$cin" "client" $size
+	fi
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
-		${rm_nr_ns1} ${rm_nr_ns2} ${speed}
+		${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed}
 	lret=$?
+
+	if [ "$test_linkfail" -eq 1 ];then
+		cp "$oldin" "$cin"
+		rm -f "$oldin"
+	fi
+
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		return
@@ -440,10 +488,11 @@ chk_rm_nr()
 sin=$(mktemp)
 sout=$(mktemp)
 cin=$(mktemp)
+cinsent=$(mktemp)
 cout=$(mktemp)
 init
-make_file "$cin" "client"
-make_file "$sin" "server"
+make_file "$cin" "client" 1
+make_file "$sin" "server" 1
 trap cleanup EXIT
 
 run_tests $ns1 $ns2 10.0.1.1
@@ -528,12 +577,23 @@ run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "multiple subflows and signal" 3 3 3
 chk_add_nr 1 1
 
+# accept and use add_addr with additional subflows and link loss
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1 1
+chk_join_nr "multiple flows, signal, link failure" 3 3 3
+chk_add_nr 1 1
+
 # add_addr timeout
 reset_with_add_addr_timeout
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl limits 1 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1 0 0 slow
+run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
 chk_add_nr 4 0
 
@@ -542,7 +602,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 0 1 slow
+run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
 chk_join_nr "remove single subflow" 1 1 1
 chk_rm_nr 1 1
 
@@ -552,7 +612,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 0 2 slow
+run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
 chk_join_nr "remove multiple subflows" 2 2 2
 chk_rm_nr 2 2
 
@@ -561,7 +621,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-run_tests $ns1 $ns2 10.0.1.1 1 0 slow
+run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
 chk_join_nr "remove single address" 1 1 1
 chk_add_nr 1 1
 chk_rm_nr 0 0
@@ -572,7 +632,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 1 1 slow
+run_tests $ns1 $ns2 10.0.1.1 0 1 1 slow
 chk_join_nr "remove subflow and signal" 2 2 2
 chk_add_nr 1 1
 chk_rm_nr 1 1
@@ -584,7 +644,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 3
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 1 2 slow
+run_tests $ns1 $ns2 10.0.1.1 0 1 2 slow
 chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
-- 
cgit v1.2.3


From 523514ed0a998fda389b9b6f00d0f2054ba30d25 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 19 Nov 2020 11:46:01 -0800
Subject: selftests: mptcp: add ADD_ADDR IPv6 test cases

This patch added IPv6 support for do_transfer, and the test cases for
ADD_ADDR IPv6.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 70 ++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f841ed8186c1..0eae628d1ffd 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -195,6 +195,12 @@ link_failure()
 	ip -net "$ns" link set "$veth" down
 }
 
+# $1: IP address
+is_v6()
+{
+	[ -z "${1##*:*}" ]
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -236,7 +242,15 @@ do_transfer()
 		mptcp_connect="./mptcp_connect -r"
 	fi
 
-	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+	local local_addr
+	if is_v6 "${connect_addr}"; then
+		local_addr="::"
+	else
+		local_addr="0.0.0.0"
+	fi
+
+	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \
+		-s ${srv_proto} ${local_addr} < "$sin" > "$sout" &
 	spid=$!
 
 	sleep 1
@@ -649,6 +663,60 @@ chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
 
+# subflow IPv6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "single subflow IPv6" 1 1 1
+
+# add_address, unused IPv6
+reset
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "unused signal address IPv6" 0 0 0
+chk_add_nr 1 1
+
+# signal address IPv6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "single address IPv6" 1 1 1
+chk_add_nr 1 1
+
+# add_addr timeout IPv6
+reset_with_add_addr_timeout 6
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
+chk_add_nr 4 0
+
+# single address IPv6, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_tests $ns1 $ns2 dead:beef:1::1 0 1 0 slow
+chk_join_nr "remove single address IPv6" 1 1 1
+chk_add_nr 1 1
+chk_rm_nr 0 0
+
+# subflow and signal IPv6, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+run_tests $ns1 $ns2 dead:beef:1::1 0 1 1 slow
+chk_join_nr "remove subflow and signal IPv6" 2 2 2
+chk_add_nr 1 1
+chk_rm_nr 1 1
+
 # single subflow, syncookies
 reset_with_cookies
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-- 
cgit v1.2.3


From d23e95c09067618eabd6d0e8cff372f0ce517c84 Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Tue, 10 Nov 2020 18:36:17 -0800
Subject: pm-graph v5.8

 - if wakeups occur in s2idle: "freeze time: N (-x ms waking y times) ms"

 - change FREEZELOOP and FREEZEWAKE to S2LOOP and S2WAKE for brevity

 - returns all sysfs vals to their initial state after testing

 - use the dmesg log for debugging until the test is completed,
   instrument the executeSuspend process to have a full trace,
   if test completes, formal dmesg log overwrites the debug log

 - fix CPU_ON and CPU_OFF devices in the timeline, should include [n]

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/README        |   4 +-
 tools/power/pm-graph/sleepgraph.py | 387 +++++++++++++++++++++----------------
 2 files changed, 227 insertions(+), 164 deletions(-)

(limited to 'tools')

diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README
index 89d0a7dab4bc..da468bd510ca 100644
--- a/tools/power/pm-graph/README
+++ b/tools/power/pm-graph/README
@@ -6,7 +6,7 @@
    |_|                    |___/          |_|
 
    pm-graph: suspend/resume/boot timing analysis tools
-    Version: 5.7
+    Version: 5.8
      Author: Todd Brandt <todd.e.brandt@intel.com>
   Home Page: https://01.org/pm-graph
 
@@ -61,7 +61,7 @@
        - runs with python2 or python3, choice is made by /usr/bin/python link
        - python
        - python-configparser (for python2 sleepgraph)
-       - python-requests (for googlesheet.py)
+       - python-requests (for stresstester.py)
        - linux-tools-common (for turbostat usage in sleepgraph)
 
        Ubuntu:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 1bc36a1db14f..81f4b8abbdf7 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -81,7 +81,7 @@ def ascii(text):
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '5.7'
+	version = '5.8'
 	ansi = False
 	rs = 0
 	display = ''
@@ -92,8 +92,9 @@ class SystemValues:
 	testlog = True
 	dmesglog = True
 	ftracelog = False
+	acpidebug = True
 	tstat = True
-	mindevlen = 0.0
+	mindevlen = 0.0001
 	mincglen = 0.0
 	cgphase = ''
 	cgtest = -1
@@ -115,6 +116,7 @@ class SystemValues:
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
 	epath = '/sys/kernel/debug/tracing/events/power/'
 	pmdpath = '/sys/power/pm_debug_messages'
+	acpipath='/sys/module/acpi/parameters/debug_level'
 	traceevents = [
 		'suspend_resume',
 		'wakeup_source_activate',
@@ -162,16 +164,16 @@ class SystemValues:
 	devdump = False
 	mixedphaseheight = True
 	devprops = dict()
+	cfgdef = dict()
 	platinfo = []
 	predelay = 0
 	postdelay = 0
-	pmdebug = ''
 	tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f'
 	tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f'
 	tracefuncs = {
 		'sys_sync': {},
 		'ksys_sync': {},
-		'pm_notifier_call_chain_robust': {},
+		'__pm_notifier_call_chain': {},
 		'pm_prepare_console': {},
 		'pm_notifier_call_chain': {},
 		'freeze_processes': {},
@@ -490,9 +492,9 @@ class SystemValues:
 		call('echo 0 > %s/wakealarm' % self.rtcpath, shell=True)
 	def initdmesg(self):
 		# get the latest time stamp from the dmesg log
-		fp = Popen('dmesg', stdout=PIPE).stdout
+		lines = Popen('dmesg', stdout=PIPE).stdout.readlines()
 		ktime = '0'
-		for line in fp:
+		for line in reversed(lines):
 			line = ascii(line).replace('\r\n', '')
 			idx = line.find('[')
 			if idx > 1:
@@ -500,7 +502,7 @@ class SystemValues:
 			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
 			if(m):
 				ktime = m.group('ktime')
-		fp.close()
+				break
 		self.dmesgstart = float(ktime)
 	def getdmesg(self, testdata):
 		op = self.writeDatafileHeader(self.dmesgfile, testdata)
@@ -715,8 +717,6 @@ class SystemValues:
 			self.fsetVal('0', 'events/kprobes/enable')
 			self.fsetVal('', 'kprobe_events')
 			self.fsetVal('1024', 'buffer_size_kb')
-		if self.pmdebug:
-			self.setVal(self.pmdebug, self.pmdpath)
 	def setupAllKprobes(self):
 		for name in self.tracefuncs:
 			self.defaultKprobe(name, self.tracefuncs[name])
@@ -740,11 +740,7 @@ class SystemValues:
 		# turn trace off
 		self.fsetVal('0', 'tracing_on')
 		self.cleanupFtrace()
-		# pm debug messages
-		pv = self.getVal(self.pmdpath)
-		if pv != '1':
-			self.setVal('1', self.pmdpath)
-			self.pmdebug = pv
+		self.testVal(self.pmdpath, 'basic', '1')
 		# set the trace clock to global
 		self.fsetVal('global', 'trace_clock')
 		self.fsetVal('nop', 'current_tracer')
@@ -900,6 +896,14 @@ class SystemValues:
 		if isgz:
 			return gzip.open(filename, mode+'t')
 		return open(filename, mode)
+	def putlog(self, filename, text):
+		with self.openlog(filename, 'a') as fp:
+			fp.write(text)
+			fp.close()
+	def dlog(self, text):
+		self.putlog(self.dmesgfile, '# %s\n' % text)
+	def flog(self, text):
+		self.putlog(self.ftracefile, text)
 	def b64unzip(self, data):
 		try:
 			out = codecs.decode(base64.b64decode(data), 'zlib').decode()
@@ -992,9 +996,7 @@ class SystemValues:
 		# add a line for each of these commands with their outputs
 		for name, cmdline, info in cmdafter:
 			footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info))
-
-		with self.openlog(self.ftracefile, 'a') as fp:
-			fp.write(footer)
+		self.flog(footer)
 		return True
 	def commonPrefix(self, list):
 		if len(list) < 2:
@@ -1034,6 +1036,7 @@ class SystemValues:
 			cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2])
 			if not cmdpath or (begin and not delta):
 				continue
+			self.dlog('[%s]' % cmdline)
 			try:
 				fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout
 				info = ascii(fp.read()).strip()
@@ -1060,6 +1063,29 @@ class SystemValues:
 			else:
 				out.append((name, cmdline, '\tnothing' if not info else info))
 		return out
+	def testVal(self, file, fmt='basic', value=''):
+		if file == 'restoreall':
+			for f in self.cfgdef:
+				if os.path.exists(f):
+					fp = open(f, 'w')
+					fp.write(self.cfgdef[f])
+					fp.close()
+			self.cfgdef = dict()
+		elif value and os.path.exists(file):
+			fp = open(file, 'r+')
+			if fmt == 'radio':
+				m = re.match('.*\[(?P<v>.*)\].*', fp.read())
+				if m:
+					self.cfgdef[file] = m.group('v')
+			elif fmt == 'acpi':
+				line = fp.read().strip().split('\n')[-1]
+				m = re.match('.* (?P<v>[0-9A-Fx]*) .*', line)
+				if m:
+					self.cfgdef[file] = m.group('v')
+			else:
+				self.cfgdef[file] = fp.read().strip()
+			fp.write(value)
+			fp.close()
 	def haveTurbostat(self):
 		if not self.tstat:
 			return False
@@ -1201,6 +1227,57 @@ class SystemValues:
 			self.multitest[sz] *= 1440
 		elif unit == 'h':
 			self.multitest[sz] *= 60
+	def displayControl(self, cmd):
+		xset, ret = 'timeout 10 xset -d :0.0 {0}', 0
+		if self.sudouser:
+			xset = 'sudo -u %s %s' % (self.sudouser, xset)
+		if cmd == 'init':
+			ret = call(xset.format('dpms 0 0 0'), shell=True)
+			if not ret:
+				ret = call(xset.format('s off'), shell=True)
+		elif cmd == 'reset':
+			ret = call(xset.format('s reset'), shell=True)
+		elif cmd in ['on', 'off', 'standby', 'suspend']:
+			b4 = self.displayControl('stat')
+			ret = call(xset.format('dpms force %s' % cmd), shell=True)
+			if not ret:
+				curr = self.displayControl('stat')
+				self.vprint('Display Switched: %s -> %s' % (b4, curr))
+				if curr != cmd:
+					self.vprint('WARNING: Display failed to change to %s' % cmd)
+			if ret:
+				self.vprint('WARNING: Display failed to change to %s with xset' % cmd)
+				return ret
+		elif cmd == 'stat':
+			fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
+			ret = 'unknown'
+			for line in fp:
+				m = re.match('[\s]*Monitor is (?P<m>.*)', ascii(line))
+				if(m and len(m.group('m')) >= 2):
+					out = m.group('m').lower()
+					ret = out[3:] if out[0:2] == 'in' else out
+					break
+			fp.close()
+		return ret
+	def setRuntimeSuspend(self, before=True):
+		if before:
+			# runtime suspend disable or enable
+			if self.rs > 0:
+				self.rstgt, self.rsval, self.rsdir = 'on', 'auto', 'enabled'
+			else:
+				self.rstgt, self.rsval, self.rsdir = 'auto', 'on', 'disabled'
+			pprint('CONFIGURING RUNTIME SUSPEND...')
+			self.rslist = deviceInfo(self.rstgt)
+			for i in self.rslist:
+				self.setVal(self.rsval, i)
+			pprint('runtime suspend %s on all devices (%d changed)' % (self.rsdir, len(self.rslist)))
+			pprint('waiting 5 seconds...')
+			time.sleep(5)
+		else:
+			# runtime suspend re-enable or re-disable
+			for i in self.rslist:
+				self.setVal(self.rstgt, i)
+			pprint('runtime suspend settings restored on %d devices' % len(self.rslist))
 
 sysvals = SystemValues()
 switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
@@ -1640,15 +1717,20 @@ class Data:
 			if 'resume_machine' in phase and 'suspend_machine' in lp:
 				tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start']
 				tL = tR - tS
-				if tL > 0:
-					left = True if tR > tZero else False
-					self.trimTime(tS, tL, left)
-					if 'trying' in self.dmesg[lp] and self.dmesg[lp]['trying'] >= 0.001:
-						tTry = round(self.dmesg[lp]['trying'] * 1000)
-						text = '%.0f (-%.0f waking)' % (tL * 1000, tTry)
+				if tL <= 0:
+					continue
+				left = True if tR > tZero else False
+				self.trimTime(tS, tL, left)
+				if 'waking' in self.dmesg[lp]:
+					tCnt = self.dmesg[lp]['waking'][0]
+					if self.dmesg[lp]['waking'][1] >= 0.001:
+						tTry = '-%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000))
 					else:
-						text = '%.0f' % (tL * 1000)
-					self.tLow.append(text)
+						tTry = '-%.3f' % (self.dmesg[lp]['waking'][1] * 1000)
+					text = '%.0f (%s ms waking %d times)' % (tL * 1000, tTry, tCnt)
+				else:
+					text = '%.0f' % (tL * 1000)
+				self.tLow.append(text)
 			lp = phase
 	def getMemTime(self):
 		if not self.hwstart or not self.hwend:
@@ -1921,7 +2003,7 @@ class Data:
 			for dev in list:
 				length = (list[dev]['end'] - list[dev]['start']) * 1000
 				width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal)
-				if width != '0.000000' and length >= mindevlen:
+				if length >= mindevlen:
 					devlist.append(dev)
 			self.tdevlist[phase] = devlist
 	def addHorizontalDivider(self, devname, devend):
@@ -3316,9 +3398,10 @@ def parseTraceLog(live=False):
 							# trim out s2idle loops, track time trying to freeze
 							llp = data.lastPhase(2)
 							if llp.startswith('suspend_machine'):
-								if 'trying' not in data.dmesg[llp]:
-									data.dmesg[llp]['trying'] = 0
-								data.dmesg[llp]['trying'] += \
+								if 'waking' not in data.dmesg[llp]:
+									data.dmesg[llp]['waking'] = [0, 0.0]
+								data.dmesg[llp]['waking'][0] += 1
+								data.dmesg[llp]['waking'][1] += \
 									t.time - data.dmesg[lp]['start']
 							data.currphase = ''
 							del data.dmesg[lp]
@@ -4555,7 +4638,7 @@ def createHTML(testruns, testfail):
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
 				for d in sorted(data.tdevlist[b]):
-					dname = d if '[' not in d else d.split('[')[0]
+					dname = d if ('[' not in d or 'CPU' in d) else d.split('[')[0]
 					name, dev = dname, phaselist[d]
 					drv = xtraclass = xtrainfo = xtrastyle = ''
 					if 'htmlclass' in dev:
@@ -5194,156 +5277,146 @@ def addScriptCode(hf, testruns):
 	'</script>\n'
 	hf.write(script_code);
 
-def setRuntimeSuspend(before=True):
-	global sysvals
-	sv = sysvals
-	if sv.rs == 0:
-		return
-	if before:
-		# runtime suspend disable or enable
-		if sv.rs > 0:
-			sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled'
-		else:
-			sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled'
-		pprint('CONFIGURING RUNTIME SUSPEND...')
-		sv.rslist = deviceInfo(sv.rstgt)
-		for i in sv.rslist:
-			sv.setVal(sv.rsval, i)
-		pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
-		pprint('waiting 5 seconds...')
-		time.sleep(5)
-	else:
-		# runtime suspend re-enable or re-disable
-		for i in sv.rslist:
-			sv.setVal(sv.rstgt, i)
-		pprint('runtime suspend settings restored on %d devices' % len(sv.rslist))
-
 # Function: executeSuspend
 # Description:
 #	 Execute system suspend through the sysfs interface, then copy the output
 #	 dmesg and ftrace files to the test output directory.
 def executeSuspend(quiet=False):
-	pm = ProcessMonitor()
-	tp = sysvals.tpath
-	if sysvals.wifi:
-		wifi = sysvals.checkWifi()
+	sv, tp, pm = sysvals, sysvals.tpath, ProcessMonitor()
+	if sv.wifi:
+		wifi = sv.checkWifi()
+		sv.dlog('wifi check, connected device is "%s"' % wifi)
 	testdata = []
 	# run these commands to prepare the system for suspend
-	if sysvals.display:
+	if sv.display:
 		if not quiet:
-			pprint('SET DISPLAY TO %s' % sysvals.display.upper())
-		displayControl(sysvals.display)
+			pprint('SET DISPLAY TO %s' % sv.display.upper())
+		ret = sv.displayControl(sv.display)
+		sv.dlog('xset display %s, ret = %d' % (sv.display, ret))
 		time.sleep(1)
-	if sysvals.sync:
+	if sv.sync:
 		if not quiet:
 			pprint('SYNCING FILESYSTEMS')
+		sv.dlog('syncing filesystems')
 		call('sync', shell=True)
-	# mark the start point in the kernel ring buffer just as we start
-	sysvals.initdmesg()
+	sv.dlog('read dmesg')
+	sv.initdmesg()
 	# start ftrace
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
+	if(sv.usecallgraph or sv.usetraceevents):
 		if not quiet:
 			pprint('START TRACING')
-		sysvals.fsetVal('1', 'tracing_on')
-		if sysvals.useprocmon:
+		sv.dlog('start ftrace tracing')
+		sv.fsetVal('1', 'tracing_on')
+		if sv.useprocmon:
+			sv.dlog('start the process monitor')
 			pm.start()
-	sysvals.cmdinfo(True)
+	sv.dlog('run the cmdinfo list before')
+	sv.cmdinfo(True)
 	# execute however many s/r runs requested
-	for count in range(1,sysvals.execcount+1):
+	for count in range(1,sv.execcount+1):
 		# x2delay in between test runs
-		if(count > 1 and sysvals.x2delay > 0):
-			sysvals.fsetVal('WAIT %d' % sysvals.x2delay, 'trace_marker')
-			time.sleep(sysvals.x2delay/1000.0)
-			sysvals.fsetVal('WAIT END', 'trace_marker')
+		if(count > 1 and sv.x2delay > 0):
+			sv.fsetVal('WAIT %d' % sv.x2delay, 'trace_marker')
+			time.sleep(sv.x2delay/1000.0)
+			sv.fsetVal('WAIT END', 'trace_marker')
 		# start message
-		if sysvals.testcommand != '':
+		if sv.testcommand != '':
 			pprint('COMMAND START')
 		else:
-			if(sysvals.rtcwake):
+			if(sv.rtcwake):
 				pprint('SUSPEND START')
 			else:
 				pprint('SUSPEND START (press a key to resume)')
 		# set rtcwake
-		if(sysvals.rtcwake):
+		if(sv.rtcwake):
 			if not quiet:
-				pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
-			sysvals.rtcWakeAlarmOn()
+				pprint('will issue an rtcwake in %d seconds' % sv.rtcwaketime)
+			sv.dlog('enable RTC wake alarm')
+			sv.rtcWakeAlarmOn()
 		# start of suspend trace marker
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			sysvals.fsetVal(datetime.now().strftime(sysvals.tmstart), 'trace_marker')
+		if(sv.usecallgraph or sv.usetraceevents):
+			sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker')
 		# predelay delay
-		if(count == 1 and sysvals.predelay > 0):
-			sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker')
-			time.sleep(sysvals.predelay/1000.0)
-			sysvals.fsetVal('WAIT END', 'trace_marker')
+		if(count == 1 and sv.predelay > 0):
+			sv.fsetVal('WAIT %d' % sv.predelay, 'trace_marker')
+			time.sleep(sv.predelay/1000.0)
+			sv.fsetVal('WAIT END', 'trace_marker')
 		# initiate suspend or command
+		sv.dlog('system executing a suspend')
 		tdata = {'error': ''}
-		if sysvals.testcommand != '':
-			res = call(sysvals.testcommand+' 2>&1', shell=True);
+		if sv.testcommand != '':
+			res = call(sv.testcommand+' 2>&1', shell=True);
 			if res != 0:
 				tdata['error'] = 'cmd returned %d' % res
 		else:
-			mode = sysvals.suspendmode
-			if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
+			mode = sv.suspendmode
+			if sv.memmode and os.path.exists(sv.mempowerfile):
 				mode = 'mem'
-				pf = open(sysvals.mempowerfile, 'w')
-				pf.write(sysvals.memmode)
-				pf.close()
-			if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile):
+				sv.testVal(sv.mempowerfile, 'radio', sv.memmode)
+			if sv.diskmode and os.path.exists(sv.diskpowerfile):
 				mode = 'disk'
-				pf = open(sysvals.diskpowerfile, 'w')
-				pf.write(sysvals.diskmode)
-				pf.close()
-			if mode == 'freeze' and sysvals.haveTurbostat():
+				sv.testVal(sv.diskpowerfile, 'radio', sv.diskmode)
+			if sv.acpidebug:
+				sv.testVal(sv.acpipath, 'acpi', '0xe')
+			if mode == 'freeze' and sv.haveTurbostat():
 				# execution will pause here
-				turbo = sysvals.turbostat()
+				turbo = sv.turbostat()
 				if turbo:
 					tdata['turbo'] = turbo
 			else:
-				pf = open(sysvals.powerfile, 'w')
+				pf = open(sv.powerfile, 'w')
 				pf.write(mode)
 				# execution will pause here
 				try:
 					pf.close()
 				except Exception as e:
 					tdata['error'] = str(e)
-		if(sysvals.rtcwake):
-			sysvals.rtcWakeAlarmOff()
+		sv.dlog('system returned from resume')
+		# reset everything
+		sv.testVal('restoreall')
+		if(sv.rtcwake):
+			sv.dlog('disable RTC wake alarm')
+			sv.rtcWakeAlarmOff()
 		# postdelay delay
-		if(count == sysvals.execcount and sysvals.postdelay > 0):
-			sysvals.fsetVal('WAIT %d' % sysvals.postdelay, 'trace_marker')
-			time.sleep(sysvals.postdelay/1000.0)
-			sysvals.fsetVal('WAIT END', 'trace_marker')
+		if(count == sv.execcount and sv.postdelay > 0):
+			sv.fsetVal('WAIT %d' % sv.postdelay, 'trace_marker')
+			time.sleep(sv.postdelay/1000.0)
+			sv.fsetVal('WAIT END', 'trace_marker')
 		# return from suspend
 		pprint('RESUME COMPLETE')
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			sysvals.fsetVal(datetime.now().strftime(sysvals.tmend), 'trace_marker')
-		if sysvals.wifi and wifi:
-			tdata['wifi'] = sysvals.pollWifi(wifi)
-		if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
+		if(sv.usecallgraph or sv.usetraceevents):
+			sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+		if sv.wifi and wifi:
+			tdata['wifi'] = sv.pollWifi(wifi)
+			sv.dlog('wifi check, %s' % tdata['wifi'])
+		if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'):
+			sv.dlog('read the ACPI FPDT')
 			tdata['fw'] = getFPDT(False)
 		testdata.append(tdata)
-	cmdafter = sysvals.cmdinfo(False)
+	sv.dlog('run the cmdinfo list after')
+	cmdafter = sv.cmdinfo(False)
 	# stop ftrace
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		if sysvals.useprocmon:
+	if(sv.usecallgraph or sv.usetraceevents):
+		if sv.useprocmon:
+			sv.dlog('stop the process monitor')
 			pm.stop()
-		sysvals.fsetVal('0', 'tracing_on')
+		sv.fsetVal('0', 'tracing_on')
 	# grab a copy of the dmesg output
 	if not quiet:
 		pprint('CAPTURING DMESG')
-	sysvals.getdmesg(testdata)
+	sysvals.dlog('EXECUTION TRACE END')
+	sv.getdmesg(testdata)
 	# grab a copy of the ftrace output
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
+	if(sv.usecallgraph or sv.usetraceevents):
 		if not quiet:
 			pprint('CAPTURING TRACE')
-		op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
+		op = sv.writeDatafileHeader(sv.ftracefile, testdata)
 		fp = open(tp+'trace', 'r')
 		for line in fp:
 			op.write(line)
 		op.close()
-		sysvals.fsetVal('', 'trace')
-		sysvals.platforminfo(cmdafter)
+		sv.fsetVal('', 'trace')
+		sv.platforminfo(cmdafter)
 
 def readFile(file):
 	if os.path.islink(file):
@@ -5586,39 +5659,6 @@ def dmidecode(mempath, fatal=False):
 		count += 1
 	return out
 
-def displayControl(cmd):
-	xset, ret = 'timeout 10 xset -d :0.0 {0}', 0
-	if sysvals.sudouser:
-		xset = 'sudo -u %s %s' % (sysvals.sudouser, xset)
-	if cmd == 'init':
-		ret = call(xset.format('dpms 0 0 0'), shell=True)
-		if not ret:
-			ret = call(xset.format('s off'), shell=True)
-	elif cmd == 'reset':
-		ret = call(xset.format('s reset'), shell=True)
-	elif cmd in ['on', 'off', 'standby', 'suspend']:
-		b4 = displayControl('stat')
-		ret = call(xset.format('dpms force %s' % cmd), shell=True)
-		if not ret:
-			curr = displayControl('stat')
-			sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
-			if curr != cmd:
-				sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
-		if ret:
-			sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd)
-			return ret
-	elif cmd == 'stat':
-		fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
-		ret = 'unknown'
-		for line in fp:
-			m = re.match('[\s]*Monitor is (?P<m>.*)', ascii(line))
-			if(m and len(m.group('m')) >= 2):
-				out = m.group('m').lower()
-				ret = out[3:] if out[0:2] == 'in' else out
-				break
-		fp.close()
-	return ret
-
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -6001,8 +6041,19 @@ def rerunTest(htmlfile=''):
 #	 execute a suspend/resume, gather the logs, and generate the output
 def runTest(n=0, quiet=False):
 	# prepare for the test
-	sysvals.initFtrace(quiet)
 	sysvals.initTestOutput('suspend')
+	op = sysvals.writeDatafileHeader(sysvals.dmesgfile, [])
+	op.write('# EXECUTION TRACE START\n')
+	op.close()
+	if n <= 1:
+		if sysvals.rs != 0:
+			sysvals.dlog('%sabling runtime suspend' % ('en' if sysvals.rs > 0 else 'dis'))
+			sysvals.setRuntimeSuspend(True)
+		if sysvals.display:
+			ret = sysvals.displayControl('init')
+			sysvals.dlog('xset display init, ret = %d' % ret)
+	sysvals.dlog('initialize ftrace')
+	sysvals.initFtrace(quiet)
 
 	# execute the test
 	executeSuspend(quiet)
@@ -6098,8 +6149,16 @@ def data_from_html(file, outpath, issues, fulldetail=False):
 	if wifi:
 		extra['wifi'] = wifi
 	low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
-	if low and 'waking' in low:
-		issue = 'FREEZEWAKE'
+	for lowstr in ['waking', '+']:
+		if not low:
+			break
+		if lowstr not in low:
+			continue
+		if lowstr == '+':
+			issue = 'S2LOOPx%d' % len(low.split('+'))
+		else:
+			m = re.match('.*waking *(?P<n>[0-9]*) *times.*', low)
+			issue = 'S2WAKEx%s' % m.group('n') if m else 'S2WAKExNaN'
 		match = [i for i in issues if i['match'] == issue]
 		if len(match) > 0:
 			match[0]['count'] += 1
@@ -6605,6 +6664,11 @@ if __name__ == '__main__':
 				val = next(args)
 			except:
 				doError('-info requires one string argument', True)
+		elif(arg == '-desc'):
+			try:
+				val = next(args)
+			except:
+				doError('-desc requires one string argument', True)
 		elif(arg == '-rs'):
 			try:
 				val = next(args)
@@ -6814,9 +6878,9 @@ if __name__ == '__main__':
 			runSummary(sysvals.outdir, True, genhtml)
 		elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']):
 			sysvals.verbose = True
-			ret = displayControl(cmd[1:])
+			ret = sysvals.displayControl(cmd[1:])
 		elif(cmd == 'xstat'):
-			pprint('Display Status: %s' % displayControl('stat').upper())
+			pprint('Display Status: %s' % sysvals.displayControl('stat').upper())
 		elif(cmd == 'wificheck'):
 			dev = sysvals.checkWifi()
 			if dev:
@@ -6854,12 +6918,8 @@ if __name__ == '__main__':
 	if mode.startswith('disk-'):
 		sysvals.diskmode = mode.split('-', 1)[-1]
 		sysvals.suspendmode = 'disk'
-
 	sysvals.systemInfo(dmidecode(sysvals.mempath))
 
-	setRuntimeSuspend(True)
-	if sysvals.display:
-		displayControl('init')
 	failcnt, ret = 0, 0
 	if sysvals.multitest['run']:
 		# run multiple tests in a separate subdirectory
@@ -6900,7 +6960,10 @@ if __name__ == '__main__':
 			sysvals.testdir = sysvals.outdir
 		# run the test in the current directory
 		ret = runTest()
+
+	# reset to default values after testing
 	if sysvals.display:
-		displayControl('reset')
-	setRuntimeSuspend(False)
+		sysvals.displayControl('reset')
+	if sysvals.rs != 0:
+		sysvals.setRuntimeSuspend(False)
 	sys.exit(ret)
-- 
cgit v1.2.3


From 93035242d9e22f2aad6ac0b886f19444713c0089 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 15 Nov 2020 14:06:50 +0200
Subject: tools/testing/scatterlist: Test dynamic __sg_alloc_table_from_pages

Add few cases to test the dynamic allocation flow of
__sg_alloc_table_from_pages.

Link: https://lore.kernel.org/r/20201115120650.139277-1-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 tools/testing/scatterlist/main.c | 64 +++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index f561aed7c657..2d27e47bbf2f 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -9,6 +9,7 @@ struct test {
 	int alloc_ret;
 	unsigned num_pages;
 	unsigned *pfn;
+	unsigned *pfn_app;
 	unsigned size;
 	unsigned int max_seg;
 	unsigned int expected_segments;
@@ -52,31 +53,39 @@ int main(void)
 {
 	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
 	struct test *test, tests[] = {
-		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
-		{ 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
-		{ 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
-		{ 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
-		{ 0, 1, pfn(0), 1, sgmax, 1 },
-		{ 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
-		{ 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 },
-		{ 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 },
-		{ 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 },
-		{ 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 },
-		{ 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
-		{ 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
-		{ 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 },
-		{ 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
-		{ 0, 0, NULL, 0, 0, 0 },
+		{ -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 },
+		{ 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 },
+		{ 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 },
+		{ 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 },
+		{ 0, 1, pfn(0), NULL, 1, sgmax, 1 },
+		{ 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 },
+		{ 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 },
+		{ 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE,
+		  3 },
+		{ 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE,
+		  2 * PAGE_SIZE, 3 },
+		{ 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE,
+		  2 * PAGE_SIZE, 4 },
+		{ 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12),
+		  6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 },
+		{ 0, 0, NULL, NULL, 0, 0, 0 },
 	};
 	unsigned int i;
 
 	for (i = 0, test = tests; test->expected_segments; test++, i++) {
+		int left_pages = test->pfn_app ? test->num_pages : 0;
 		struct page *pages[MAX_PAGES];
 		struct sg_table st;
 		struct scatterlist *sg;
@@ -84,14 +93,23 @@ int main(void)
 		set_pages(pages, test->pfn, test->num_pages);
 
 		sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
-				test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+				test->size, test->max_seg, NULL, left_pages, GFP_KERNEL);
 		assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
 
 		if (test->alloc_ret)
 			continue;
 
+		if (test->pfn_app) {
+			set_pages(pages, test->pfn_app, test->num_pages);
+			sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+					test->size, test->max_seg, sg, 0, GFP_KERNEL);
+
+			assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+		}
+
 		VALIDATE(st.nents == test->expected_segments, &st, test);
-		VALIDATE(st.orig_nents == test->expected_segments, &st, test);
+		if (!test->pfn_app)
+			VALIDATE(st.orig_nents == test->expected_segments, &st, test);
 
 		sg_free_table(&st);
 	}
-- 
cgit v1.2.3


From 716572b0003ef67a4889bd7d85baf5099c5a0248 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 2 Nov 2020 11:51:10 -0800
Subject: selftests/x86/fsgsbase: Fix GS == 1, 2, and 3 tests

Setting GS to 1, 2, or 3 causes a nonsensical part of the IRET microcode
to change GS back to zero on a return from kernel mode to user mode. The
result is that these tests fail randomly depending on when interrupts
happen. Detect when this happens and let the test pass.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/7567fd44a1d60a9424f25b19a998f12149993b0d.1604346596.git.luto@kernel.org
---
 tools/testing/selftests/x86/fsgsbase.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 7161cfc2e60b..8c780cce941d 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -392,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local,
 		local = read_base(GS);
 
 		/*
-		 * Signal delivery seems to mess up weird selectors.  Put it
-		 * back.
+		 * Signal delivery is quite likely to change a selector
+		 * of 1, 2, or 3 back to 0 due to IRET being defective.
 		 */
 		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
 	} else {
@@ -411,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local,
 	if (base == local && sel_pre_sched == sel_post_sched) {
 		printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
 		       sel_pre_sched, local);
+	} else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 &&
+		   sel_post_sched == 0) {
+		/*
+		 * IRET is misdesigned and will squash selectors 1, 2, or 3
+		 * to zero.  Don't fail the test just because this happened.
+		 */
+		printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n",
+		       sel_pre_sched, local, sel_post_sched, base);
 	} else {
 		nerrs++;
 		printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
-- 
cgit v1.2.3


From aeaaf005da1de075929e56562dced4a58238efc4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 2 Nov 2020 11:51:11 -0800
Subject: selftests/x86: Add missing .note.GNU-stack sections

Several of the x86 selftests end up with executable stacks because
the asm was missing the annotation that says that they are modern
and don't need executable stacks.  Add the annotations.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/6f043c03e9e0e4557e1e975a63b07a4d18965a68.1604346596.git.luto@kernel.org
---
 tools/testing/selftests/x86/raw_syscall_helper_32.S | 2 ++
 tools/testing/selftests/x86/thunks.S                | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
index 94410fa2b5ed..a10d36afdca0 100644
--- a/tools/testing/selftests/x86/raw_syscall_helper_32.S
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -45,3 +45,5 @@ int80_and_ret:
 
 	.type int80_and_ret, @function
 	.size int80_and_ret, .-int80_and_ret
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
index 1bb5d62c16a4..a2d47d8344d4 100644
--- a/tools/testing/selftests/x86/thunks.S
+++ b/tools/testing/selftests/x86/thunks.S
@@ -57,3 +57,5 @@ call32_from_64:
 	ret
 
 .size call32_from_64, .-call32_from_64
+
+.section .note.GNU-stack,"",%progbits
-- 
cgit v1.2.3


From 24eb2a02a68c98d46878214f46f855d934dc73ff Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 23 Nov 2020 09:12:26 +0200
Subject: selftests: mlxsw: Add blackhole nexthop configuration tests

Test the mlxsw allows blackhole nexthops to be installed and that the
nexthops are marked as offloaded.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 25 +++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5de47d72f8c9..a2eff5f58209 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -32,6 +32,7 @@ ALL_TESTS="
 	nexthop_obj_invalid_test
 	nexthop_obj_offload_test
 	nexthop_obj_group_offload_test
+	nexthop_obj_blackhole_offload_test
 	nexthop_obj_route_offload_test
 	devlink_reload_test
 "
@@ -693,9 +694,6 @@ nexthop_obj_invalid_test()
 	ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
 	check_fail $? "managed to configure a nexthop with MPLS encap when should not"
 
-	ip nexthop add id 1 blackhole
-	check_fail $? "managed to configure a blackhole nexthop when should not"
-
 	ip nexthop add id 1 dev $swp1
 	ip nexthop add id 2 dev $swp1
 	ip nexthop add id 10 group 1/2
@@ -817,6 +815,27 @@ nexthop_obj_group_offload_test()
 	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
 }
 
+nexthop_obj_blackhole_offload_test()
+{
+	# Test offload indication of blackhole nexthop objects
+	RET=0
+
+	ip nexthop add id 1 blackhole
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "Blackhole nexthop not marked as offloaded when should"
+
+	ip nexthop add id 10 group 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "Nexthop group not marked as offloaded when should"
+
+	log_test "blackhole nexthop objects offload indication"
+
+	ip nexthop del id 10
+	ip nexthop del id 1
+}
+
 nexthop_obj_route_offload_test()
 {
 	# Test offload indication of routes using nexthop objects
-- 
cgit v1.2.3


From 1beaff779f783268dcc61c0a8fbd16e9c8b0abfa Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 23 Nov 2020 09:12:27 +0200
Subject: selftests: forwarding: Add blackhole nexthops tests

Test that IPv4 and IPv6 ping fail when the route is using a blackhole
nexthop or a group with a blackhole nexthop. Test that ping passes when
the route starts using a valid nexthop.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/router_mpath_nh.sh    | 58 +++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index e8c2573d5232..388e4492b81b 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,7 +1,13 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+	ping_ipv4_blackhole
+	ping_ipv6_blackhole
+"
 NUM_NETIFS=8
 source lib.sh
 
@@ -302,6 +308,56 @@ multipath_test()
 	multipath6_l4_test "Weighted MP 11:45" 11 45
 }
 
+ping_ipv4_blackhole()
+{
+	RET=0
+
+	ip nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
+	ping_do $h1 198.51.100.2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv4 blackhole ping"
+
+	ip nexthop del id 1002
+	ip nexthop del id 1001
+}
+
+ping_ipv6_blackhole()
+{
+	RET=0
+
+	ip -6 nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
+	ping6_do $h1 2001:db8:2::2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv6 blackhole ping"
+
+	ip nexthop del id 1002
+	ip -6 nexthop del id 1001
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit v1.2.3


From 84e8feeadcf048903e65b7d82769c58576c506b0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 23 Nov 2020 09:12:30 +0200
Subject: selftests: mlxsw: Add blackhole_nexthop trap test

Test that packets hitting a blackhole nexthop are trapped to the CPU
when the trap is enabled. Test that packets are not reported when the
trap is disabled.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/devlink_trap_l3_drops.sh     | 36 ++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index f5abb1ebd392..4029833f7e27 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -52,6 +52,7 @@ ALL_TESTS="
 	blackhole_route_test
 	irif_disabled_test
 	erif_disabled_test
+	blackhole_nexthop_test
 "
 
 NUM_NETIFS=4
@@ -647,6 +648,41 @@ erif_disabled_test()
 	devlink_trap_action_set $trap_name "drop"
 }
 
+__blackhole_nexthop_test()
+{
+	local flags=$1; shift
+	local subnet=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local trap_name="blackhole_nexthop"
+	local mz_pid
+
+	RET=0
+
+	ip -$flags nexthop add id 1 blackhole
+	ip -$flags route add $subnet nhid 1
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower skip_hw dst_ip $dip ip_proto udp action drop
+
+	# Generate packets to the blackhole nexthop
+	$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+	log_test "Blackhole nexthop: IPv$flags"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+	ip -$flags route del $subnet
+	ip -$flags nexthop del id 1
+}
+
+blackhole_nexthop_test()
+{
+	__blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+	__blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 05a98d7672731aeb5f9837b35cc7fe70444e70bd Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Sat, 21 Nov 2020 21:22:04 -0500
Subject: selftest/bpf: Fix link in readme

The link was bad because of invalid rst; it was pointing to itself and
was rendering badly.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201122022205.57229-1-andreimatei1@gmail.com
---
 tools/testing/selftests/bpf/README.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index ac9eda830187..3b8d8885892d 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -2,7 +2,10 @@
 BPF Selftest Notes
 ==================
 General instructions on running selftests can be found in
-`Documentation/bpf/bpf_devel_QA.rst`_.
+`Documentation/bpf/bpf_devel_QA.rst`__.
+
+__ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests
+
 
 Additional information about selftest failures are
 documented here.
-- 
cgit v1.2.3


From 1c26ac6ab3ce47ee2e6342373681dedbb97e21a3 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Sat, 21 Nov 2020 21:22:05 -0500
Subject: selftest/bpf: Fix rst formatting in readme

A couple of places in the readme had invalid rst formatting causing the
rendering to be off. This patch fixes them with minimal edits.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201122022205.57229-2-andreimatei1@gmail.com
---
 tools/testing/selftests/bpf/README.rst | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3b8d8885892d..ca064180d4d0 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -33,11 +33,12 @@ The verifier will reject such code with above error.
 At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
 the insn 20 undoes map_value addition. It is currently impossible for the
 verifier to understand such speculative pointer arithmetic.
-Hence
-    https://reviews.llvm.org/D85570
-addresses it on the compiler side. It was committed on llvm 12.
+Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12.
+
+__ https://reviews.llvm.org/D85570
 
 The corresponding C code
+
 .. code-block:: c
 
   for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
@@ -80,10 +81,11 @@ The symptom for ``bpf_iter/netlink`` looks like
   17: (7b) *(u64 *)(r7 +0) = r2
   only read is supported
 
-This is due to a llvm BPF backend bug. The fix 
-  https://reviews.llvm.org/D78466
+This is due to a llvm BPF backend bug. `The fix`__
 has been pushed to llvm 10.x release branch and will be
-available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
+
+__  https://reviews.llvm.org/D78466
 
 BPF CO-RE-based tests and Clang version
 =======================================
@@ -97,11 +99,11 @@ them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
 old to support them, they shouldn't cause build failures or runtime test
 failures:
 
-  - __builtin_btf_type_id() ([0], [1], [2]);
-  - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+- __builtin_btf_type_id() [0_, 1_, 2_];
+- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_].
 
-  [0] https://reviews.llvm.org/D74572
-  [1] https://reviews.llvm.org/D74668
-  [2] https://reviews.llvm.org/D85174
-  [3] https://reviews.llvm.org/D83878
-  [4] https://reviews.llvm.org/D83242
+.. _0: https://reviews.llvm.org/D74572
+.. _1: https://reviews.llvm.org/D74668
+.. _2: https://reviews.llvm.org/D85174
+.. _3: https://reviews.llvm.org/D83878
+.. _4: https://reviews.llvm.org/D83242
-- 
cgit v1.2.3


From 68878a5c5b852d17f5827ce8a0f6fbd8b4cdfada Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 24 Nov 2020 18:41:00 +0800
Subject: bpftool: Fix error return value in build_btf_type_table

An appropriate return value should be set on the failed path.

Fixes: 4d374ba0bf30 ("tools: bpftool: implement "bpftool btf show|list"")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124104100.491-1-thunder.leizhen@huawei.com
---
 tools/bpf/bpftool/btf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 8ab142ff5eac..2afb7d5b1aca 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -693,6 +693,7 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type,
 		obj_node = calloc(1, sizeof(*obj_node));
 		if (!obj_node) {
 			p_err("failed to allocate memory: %s", strerror(errno));
+			err = -ENOMEM;
 			goto err_free;
 		}
 
-- 
cgit v1.2.3


From db13db9f67fe5049159a05e870daedcee5879f8d Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 24 Nov 2020 15:21:14 +0800
Subject: libbpf: Add support for canceling cached_cons advance

Add a new function for returning descriptors the user received
after an xsk_ring_cons__peek call. After the application has
gotten a number of descriptors from a ring, it might not be able
to or want to process them all for various reasons. Therefore,
it would be useful to have an interface for returning or
cancelling a number of them so that they are returned to the ring.

This patch adds a new function called xsk_ring_cons__cancel that
performs this operation on nb descriptors counted from the end of
the batch of descriptors that was received through the peek call.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
[ Magnus Karlsson: rewrote changelog ]
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/1606202474-8119-1-git-send-email-lirongqing@baidu.com
---
 tools/lib/bpf/xsk.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 1069c46364ff..1719a327e5f9 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -153,6 +153,12 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 	return entries;
 }
 
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons,
+					 size_t nb)
+{
+	cons->cached_cons -= nb;
+}
+
 static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
 {
 	/* Make sure data has been read before indicating we are done
-- 
cgit v1.2.3


From 27672f0d280a3f286a410a8db2004f46ace72a17 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 24 Nov 2020 15:12:09 +0000
Subject: bpf: Add a BPF helper for getting the IMA hash of an inode

Provide a wrapper function to get the IMA hash of an inode. This helper
is useful in fingerprinting files (e.g executables on execution) and
using these fingerprints in detections like an executable unlinking
itself.

Since the ima_inode_hash can sleep, it's only allowed for sleepable
LSM hooks.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124151210.1081188-3-kpsingh@chromium.org
---
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3ca6146f001a..c3458ec1f30a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3807,6 +3807,16 @@ union bpf_attr {
  * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
  * 	Return
  * 		Current *ktime*.
+ *
+ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
+ *	Description
+ *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ *		invalid arguments are passed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3970,6 +3980,7 @@ union bpf_attr {
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
+	FN(ima_inode_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 34b82d3ac1058653b3de7be4697b55f67533b1f1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 24 Nov 2020 15:12:10 +0000
Subject: bpf: Add a selftest for bpf_ima_inode_hash

The test does the following:

- Mounts a loopback filesystem and appends the IMA policy to measure
  executions only on this file-system. Restricting the IMA policy to
  a particular filesystem prevents a system-wide IMA policy change.
- Executes an executable copied to this loopback filesystem.
- Calls the bpf_ima_inode_hash in the bprm_committed_creds hook and
  checks if the call succeeded and checks if a hash was calculated.

The test shells out to the added ima_setup.sh script as the setup is
better handled in a shell script and is more complicated to do in the
test program or even shelling out individual commands from C.

The list of required configs (i.e. IMA, SECURITYFS,
IMA_{WRITE,READ}_POLICY) for running this test are also updated.

Suggested-by: Mimi Zohar <zohar@linux.ibm.com> (limit policy rule to loopback mount)
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124151210.1081188-4-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/config                |  4 ++
 tools/testing/selftests/bpf/ima_setup.sh          | 80 +++++++++++++++++++++++
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 74 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/ima.c           | 28 ++++++++
 4 files changed, 186 insertions(+)
 create mode 100755 tools/testing/selftests/bpf/ima_setup.sh
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_ima.c
 create mode 100644 tools/testing/selftests/bpf/progs/ima.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 2118e23ac07a..365bf9771b07 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -39,3 +39,7 @@ CONFIG_BPF_JIT=y
 CONFIG_BPF_LSM=y
 CONFIG_SECURITY=y
 CONFIG_LIRC=y
+CONFIG_IMA=y
+CONFIG_SECURITYFS=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_READ_POLICY=y
diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
new file mode 100755
index 000000000000..15490ccc5e55
--- /dev/null
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+
+IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
+TEST_BINARY="/bin/true"
+
+usage()
+{
+        echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
+        exit 1
+}
+
+setup()
+{
+        local tmp_dir="$1"
+        local mount_img="${tmp_dir}/test.img"
+        local mount_dir="${tmp_dir}/mnt"
+        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+        mkdir -p ${mount_dir}
+
+        dd if=/dev/zero of="${mount_img}" bs=1M count=10
+
+        local loop_device="$(losetup --find --show ${mount_img})"
+
+        mkfs.ext4 "${loop_device}"
+        mount "${loop_device}" "${mount_dir}"
+
+        cp "${TEST_BINARY}" "${mount_dir}"
+        local mount_uuid="$(blkid -s UUID -o value ${loop_device})"
+        echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+}
+
+cleanup() {
+        local tmp_dir="$1"
+        local mount_img="${tmp_dir}/test.img"
+        local mount_dir="${tmp_dir}/mnt"
+
+        local loop_devices=$(losetup -j ${mount_img} -O NAME --noheadings)
+        for loop_dev in "${loop_devices}"; do
+                losetup -d $loop_dev
+        done
+
+        umount ${mount_dir}
+        rm -rf ${tmp_dir}
+}
+
+run()
+{
+        local tmp_dir="$1"
+        local mount_dir="${tmp_dir}/mnt"
+        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+        exec "${copied_bin_path}"
+}
+
+main()
+{
+        [[ $# -ne 2 ]] && usage
+
+        local action="$1"
+        local tmp_dir="$2"
+
+        [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+        if [[ "${action}" == "setup" ]]; then
+                setup "${tmp_dir}"
+        elif [[ "${action}" == "cleanup" ]]; then
+                cleanup "${tmp_dir}"
+        elif [[ "${action}" == "run" ]]; then
+                run "${tmp_dir}"
+        else
+                echo "Unknown action: ${action}"
+                exit 1
+        fi
+}
+
+main "$@"
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
new file mode 100644
index 000000000000..61fca681d524
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <test_progs.h>
+
+#include "ima.skel.h"
+
+static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+{
+	int child_pid, child_status;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		*monitored_pid = getpid();
+		execlp("./ima_setup.sh", "./ima_setup.sh", "run", measured_dir,
+		       NULL);
+		exit(errno);
+
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		return WEXITSTATUS(child_status);
+	}
+
+	return -EINVAL;
+}
+
+void test_test_ima(void)
+{
+	char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
+	const char *measured_dir;
+	char cmd[256];
+
+	int err, duration = 0;
+	struct ima *skel = NULL;
+
+	skel = ima__open_and_load();
+	if (CHECK(!skel, "skel_load", "skeleton failed\n"))
+		goto close_prog;
+
+	err = ima__attach(skel);
+	if (CHECK(err, "attach", "attach failed: %d\n", err))
+		goto close_prog;
+
+	measured_dir = mkdtemp(measured_dir_template);
+	if (CHECK(measured_dir == NULL, "mkdtemp", "err %d\n", errno))
+		goto close_prog;
+
+	snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
+	if (CHECK_FAIL(system(cmd)))
+		goto close_clean;
+
+	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+	if (CHECK(err, "run_measured_process", "err = %d\n", err))
+		goto close_clean;
+
+	CHECK(skel->data->ima_hash_ret < 0, "ima_hash_ret",
+	      "ima_hash_ret = %ld\n", skel->data->ima_hash_ret);
+
+	CHECK(skel->bss->ima_hash == 0, "ima_hash",
+	      "ima_hash = %lu\n", skel->bss->ima_hash);
+
+close_clean:
+	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
+	CHECK_FAIL(system(cmd));
+close_prog:
+	ima__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
new file mode 100644
index 000000000000..86b21aff4bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+long ima_hash_ret = -1;
+u64 ima_hash = 0;
+u32 monitored_pid = 0;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/bprm_committed_creds")
+int BPF_PROG(ima, struct linux_binprm *bprm)
+{
+	u32 pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid == monitored_pid)
+		ima_hash_ret = bpf_ima_inode_hash(bprm->file->f_inode,
+						  &ima_hash, sizeof(ima_hash));
+
+	return 0;
+}
-- 
cgit v1.2.3


From fb3558127cb62ba2dea9e3d0efa1bb1d7e5eee2a Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Tue, 24 Nov 2020 22:52:55 -0500
Subject: bpf: Fix selftest compilation on clang 11

Before this patch, profiler.inc.h wouldn't compile with clang-11 (before
the __builtin_preserve_enum_value LLVM builtin was introduced in
https://reviews.llvm.org/D83242).

Another test that uses this builtin (test_core_enumval) is conditionally
skipped if the compiler is too old. In that spirit, this patch inhibits
part of populate_cgroup_info(), which needs this CO-RE builtin. The
selftests build again on clang-11.

The affected test (the profiler test) doesn't pass on clang-11 because
it's missing https://reviews.llvm.org/D85570, but at least the test suite
as a whole compiles. The test's expected failure is already called out in
the README.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Florian Lehner <dev@der-flo.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201125035255.17970-1-andreimatei1@gmail.com
---
 tools/testing/selftests/bpf/progs/profiler.inc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 30982a7e4d0f..4896fdf816f7 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -256,6 +256,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 
+#if __has_builtin(__builtin_preserve_enum_value)
 	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
 		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
 						  pids_cgrp_id___local);
@@ -275,6 +276,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 			}
 		}
 	}
+#endif
 
 	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
 	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
-- 
cgit v1.2.3


From 105c4e75feb411a60f5089f7a1e68b8523f986cc Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 26 Nov 2020 10:37:35 +0100
Subject: libbpf: Replace size_t with __u32 in xsk interfaces

Replace size_t with __u32 in the xsk interfaces that contain this.
There is no reason to have size_t since the internal variable that
is manipulated is a __u32. The following APIs are affected:

__u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
__u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)

The "nb" variable and the return values have been changed from size_t
to __u32.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1606383455-8243-1-git-send-email-magnus.karlsson@gmail.com
---
 tools/lib/bpf/xsk.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 1719a327e5f9..5865e082ba0b 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -113,8 +113,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	return (entries > nb) ? nb : entries;
 }
 
-static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
-					    size_t nb, __u32 *idx)
+static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
 {
 	if (xsk_prod_nb_free(prod, nb) < nb)
 		return 0;
@@ -125,7 +124,7 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
 	return nb;
 }
 
-static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 {
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
@@ -135,10 +134,9 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
 	*prod->producer += nb;
 }
 
-static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
-					 size_t nb, __u32 *idx)
+static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
 {
-	size_t entries = xsk_cons_nb_avail(cons, nb);
+	__u32 entries = xsk_cons_nb_avail(cons, nb);
 
 	if (entries > 0) {
 		/* Make sure we do not speculatively read the data before
@@ -153,13 +151,12 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 	return entries;
 }
 
-static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons,
-					 size_t nb)
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
 {
 	cons->cached_cons -= nb;
 }
 
-static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 {
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
-- 
cgit v1.2.3


From 4f336e88a870ecc56832154dff22853a3ca33e24 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Tue, 24 Nov 2020 18:24:50 +0300
Subject: selftests/tls: add CHACHA20-POLY1305 to tls selftests

Add new cipher as a variant of standard tls selftests

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tls.c | 40 ++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index b599f1fa99b5..cb0d1890a860 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -103,32 +103,58 @@ FIXTURE(tls)
 
 FIXTURE_VARIANT(tls)
 {
-	unsigned int tls_version;
+	u16 tls_version;
+	u16 cipher_type;
 };
 
-FIXTURE_VARIANT_ADD(tls, 12)
+FIXTURE_VARIANT_ADD(tls, 12_gcm)
 {
 	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
 };
 
-FIXTURE_VARIANT_ADD(tls, 13)
+FIXTURE_VARIANT_ADD(tls, 13_gcm)
 {
 	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
+};
+
+FIXTURE_VARIANT_ADD(tls, 12_chacha)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13_chacha)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
 };
 
 FIXTURE_SETUP(tls)
 {
-	struct tls12_crypto_info_aes_gcm_128 tls12;
+	union tls_crypto_context tls12;
 	struct sockaddr_in addr;
 	socklen_t len;
 	int sfd, ret;
+	size_t tls12_sz;
 
 	self->notls = false;
 	len = sizeof(addr);
 
 	memset(&tls12, 0, sizeof(tls12));
 	tls12.info.version = variant->tls_version;
-	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+	tls12.info.cipher_type = variant->cipher_type;
+	switch (variant->cipher_type) {
+	case TLS_CIPHER_CHACHA20_POLY1305:
+		tls12_sz = sizeof(tls12_crypto_info_chacha20_poly1305);
+		break;
+	case TLS_CIPHER_AES_GCM_128:
+		tls12_sz = sizeof(tls12_crypto_info_aes_gcm_128);
+		break;
+	default:
+		tls12_sz = 0;
+	}
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -156,7 +182,7 @@ FIXTURE_SETUP(tls)
 
 	if (!self->notls) {
 		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
+				 tls12_sz);
 		ASSERT_EQ(ret, 0);
 	}
 
@@ -169,7 +195,7 @@ FIXTURE_SETUP(tls)
 		ASSERT_EQ(ret, 0);
 
 		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
+				 tls12_sz);
 		ASSERT_EQ(ret, 0);
 	}
 
-- 
cgit v1.2.3


From e14038a7ead09faa180eb072adc4a2157a0b475f Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 26 Nov 2020 19:47:47 +0100
Subject: selftests: tc-testing: enable CONFIG_NET_SCH_RED as a module

a proper kernel configuration for running kselftest can be obtained with:

 $ yes | make kselftest-merge

enable compile support for the 'red' qdisc: otherwise, tdc kselftest fail
when trying to run tdc test items contained in red.json.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/cfa23f2d4f672401e6cebca3a321dd1901a9ff07.1606416464.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c33a7aac27ff..b71828df5a6d 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -59,6 +59,7 @@ CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
 CONFIG_NET_SCH_FIFO=y
 CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_RED=m
 
 #
 ## Network testing
-- 
cgit v1.2.3


From f3ed003e64fe7faecbe4c34bd2a1f5571a23f05a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 26 Oct 2020 18:59:27 +0200
Subject: kunit: Introduce get_file_path() helper

Helper allows to derive file names depending on --build_dir argument.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_kernel.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 2e3cc0fac726..57c1724b7e5d 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -23,6 +23,11 @@ DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig'
 BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
 OUTFILE_PATH = 'test.log'
 
+def get_file_path(build_dir, default):
+	if build_dir:
+		default = os.path.join(build_dir, default)
+	return default
+
 class ConfigError(Exception):
 	"""Represents an error trying to configure the Linux kernel."""
 
@@ -97,9 +102,7 @@ class LinuxSourceTreeOperations(object):
 
 	def linux_bin(self, params, timeout, build_dir):
 		"""Runs the Linux UML binary. Must be named 'linux'."""
-		linux_bin = './linux'
-		if build_dir:
-			linux_bin = os.path.join(build_dir, 'linux')
+		linux_bin = get_file_path(build_dir, 'linux')
 		outfile = get_outfile_path(build_dir)
 		with open(outfile, 'w') as output:
 			process = subprocess.Popen([linux_bin] + params,
@@ -108,22 +111,13 @@ class LinuxSourceTreeOperations(object):
 			process.wait(timeout)
 
 def get_kconfig_path(build_dir):
-	kconfig_path = KCONFIG_PATH
-	if build_dir:
-		kconfig_path = os.path.join(build_dir, KCONFIG_PATH)
-	return kconfig_path
+	return get_file_path(build_dir, KCONFIG_PATH)
 
 def get_kunitconfig_path(build_dir):
-	kunitconfig_path = KUNITCONFIG_PATH
-	if build_dir:
-		kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH)
-	return kunitconfig_path
+	return get_file_path(build_dir, KUNITCONFIG_PATH)
 
 def get_outfile_path(build_dir):
-	outfile_path = OUTFILE_PATH
-	if build_dir:
-		outfile_path = os.path.join(build_dir, OUTFILE_PATH)
-	return outfile_path
+	return get_file_path(build_dir, OUTFILE_PATH)
 
 class LinuxSourceTree(object):
 	"""Represents a Linux kernel source tree with KUnit tests."""
-- 
cgit v1.2.3


From 271e0c9dce1b02a825b3cc1a7aa1fab7c381d44b Mon Sep 17 00:00:00 2001
From: Libo Chen <libo.chen@oracle.com>
Date: Fri, 20 Nov 2020 18:12:43 -0800
Subject: ktest.pl: Fix incorrect reboot for grub2bls

This issue was first noticed when I was testing different kernels on
Oracle Linux 8 which as Fedora 30+ adopts BLS as default. Even though a
kernel entry was added successfully and the index of that kernel entry was
retrieved correctly, ktest still wouldn't reboot the system into
user-specified kernel.

The bug was spotted in subroutine reboot_to where the if-statement never
checks for REBOOT_TYPE "grub2bls", therefore the desired entry will not be
set for the next boot.

Add a check for "grub2bls" so that $grub_reboot $grub_number can
be run before a reboot if REBOOT_TYPE is "grub2bls" then we can boot to
the correct kernel.

Link: https://lkml.kernel.org/r/20201121021243.1532477-1-libo.chen@oracle.com

Cc: stable@vger.kernel.org
Fixes: ac2466456eaa ("ktest: introduce grub2bls REBOOT_TYPE option")
Signed-off-by: Libo Chen <libo.chen@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index cb16d2aac51c..54188ee16c48 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -2040,7 +2040,7 @@ sub reboot_to {
 
     if ($reboot_type eq "grub") {
 	run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
-    } elsif ($reboot_type eq "grub2") {
+    } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) {
 	run_ssh "$grub_reboot $grub_number";
     } elsif ($reboot_type eq "syslinux") {
 	run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
-- 
cgit v1.2.3


From 854055c0cf30d732b3514ce7956976f60496b1a1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 26 Nov 2020 18:49:46 +0000
Subject: selftests/bpf: Fix flavored variants of test_ima

Flavored variants of test_progs (e.g. test_progs-no_alu32) change their
working directory to the corresponding subdirectory (e.g. no_alu32).
Since the setup script required by test_ima (ima_setup.sh) is not
mentioned in the dependencies, it does not get copied to these
subdirectories and causes flavored variants of test_ima to fail.

Adding the script to TRUNNER_EXTRA_FILES ensures that the file is also
copied to the subdirectories for the flavored variants of test_progs.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Yonghong Song <yhs@fb.com>
Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201126184946.1708213-1-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3d5940cd110d..894192c319fb 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -389,6 +389,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
 			 btf_helpers.c	flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
+		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-- 
cgit v1.2.3


From e86843580d1bb1ce12544bca3115cf11d51603ff Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 20 Nov 2020 11:29:13 +0900
Subject: tools/bootconfig: Store size and checksum in footer as le32

Store the size and the checksum fields in the footer as le32
instead of u32. This will allow us to apply bootconfig to the
cross build initrd without caring the endianness.

Link: https://lkml.kernel.org/r/160583935332.547349.5897811300636587426.stgit@devnote2

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 4a445b6304bb..7362bef1a368 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -10,6 +10,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
+#include <endian.h>
 
 #include <linux/kernel.h>
 #include <linux/bootconfig.h>
@@ -183,9 +184,11 @@ static int load_xbc_from_initrd(int fd, char **buf)
 
 	if (read(fd, &size, sizeof(u32)) < 0)
 		return pr_errno("Failed to read size", -errno);
+	size = le32toh(size);
 
 	if (read(fd, &csum, sizeof(u32)) < 0)
 		return pr_errno("Failed to read checksum", -errno);
+	csum = le32toh(csum);
 
 	/* Wrong size error  */
 	if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
@@ -407,10 +410,10 @@ static int apply_xbc(const char *path, const char *xbc_path)
 
 	/* Add a footer */
 	p = data + size;
-	*(u32 *)p = size;
+	*(u32 *)p = htole32(size);
 	p += sizeof(u32);
 
-	*(u32 *)p = csum;
+	*(u32 *)p = htole32(csum);
 	p += sizeof(u32);
 
 	memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-- 
cgit v1.2.3


From 0c7a7e1a8ff3fb6d01467b43c62760e6bf0afab3 Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Mon, 9 Nov 2020 23:29:36 -0800
Subject: kunit: kunit_tool: Correctly parse diagnostic messages

Currently, kunit_tool expects all diagnostic lines in test results to
contain ": " somewhere, as both the subtest header and the crash report
do. Fix this to accept any line starting with (minus indent) "# " as
being a valid diagnostic line.

This matches what the TAP spec[1] and the draft KTAP spec[2] are
expecting.

[1]: http://testanything.org/tap-specification.html
[2]: https://lore.kernel.org/linux-kselftest/CY4PR13MB1175B804E31E502221BC8163FD830@CY4PR13MB1175.namprd13.prod.outlook.com/T/

Signed-off-by: David Gow <davidgow@google.com>
Acked-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index bbfe1b4e4c1c..6614ec4d0898 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -135,8 +135,8 @@ def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
 	else:
 		return False
 
-SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# .*?: (.*)$')
-DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!'
+SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$')
+DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$')
 
 def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
 	save_non_diagnositic(lines, test_case)
@@ -146,7 +146,8 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
 	match = SUBTEST_DIAGNOSTIC.match(line)
 	if match:
 		test_case.log.append(lines.pop(0))
-		if match.group(1) == DIAGNOSTIC_CRASH_MESSAGE:
+		crash_match = DIAGNOSTIC_CRASH_MESSAGE.match(line)
+		if crash_match:
 			test_case.status = TestStatus.TEST_CRASHED
 		return True
 	else:
-- 
cgit v1.2.3


From 008cb2ec4354fa1c4a166eca8e5eec15112847b3 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Sun, 29 Nov 2020 14:54:07 +0200
Subject: selftests: forwarding: Add QinQ veto testing

Test that each veto that was added in the previous patch, is indeed
vetoed.

$ ./q_in_q_veto.sh

TEST: create 802.1ad vlan upper on top of a front panel             [ OK ]
TEST: create 802.1ad vlan upper on top of a bridge port             [ OK ]
TEST: create 802.1ad vlan upper on top of a lag                     [ OK ]
TEST: create 802.1ad vlan upper on top 802.1q bridge                [ OK ]
TEST: create 802.1ad vlan upper on top 802.1ad bridge               [ OK ]
TEST: create 802.1q vlan upper on top 802.1ad bridge                [ OK ]
TEST: create vlan upper on top of front panel enslaved to 802.1ad bridge
[ OK ]
TEST: create vlan upper on top of lag enslaved to 802.1ad bridge    [ OK ]
TEST: enslave front panel with vlan upper to 802.1ad bridge         [ OK ]
TEST: enslave lag with vlan upper to 802.1ad bridge                 [ OK ]
TEST: IP address addition to 802.1ad bridge                         [ OK ]
TEST: switch bridge protocol                                        [ OK ]

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/q_in_q_veto.sh     | 296 +++++++++++++++++++++
 1 file changed, 296 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
new file mode 100755
index 000000000000..7edaed8eb86a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
@@ -0,0 +1,296 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	create_8021ad_vlan_upper_on_top_front_panel_port
+	create_8021ad_vlan_upper_on_top_bridge_port
+	create_8021ad_vlan_upper_on_top_lag
+	create_8021ad_vlan_upper_on_top_bridge
+	create_8021ad_vlan_upper_on_top_8021ad_bridge
+	create_vlan_upper_on_top_8021ad_bridge
+	create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge
+	create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge
+	enslave_front_panel_with_vlan_upper_to_8021ad_bridge
+	enslave_lag_with_vlan_upper_to_8021ad_bridge
+	add_ip_address_to_8021ad_bridge
+	switch_bridge_protocol_from_8021q_to_8021ad
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	sleep 10
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_vlan_upper_on_top_of_bridge()
+{
+	RET=0
+
+	local bridge_proto=$1; shift
+	local netdev_proto=$1; shift
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip link add name br0.100 link br0 type vlan \
+		protocol $netdev_proto id 100 2>/dev/null
+	check_fail $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge not rejected"
+
+	ip link add name br0.100 link br0 type vlan \
+		protocol $netdev_proto id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge rejected without extack"
+
+	log_test "create $netdev_proto vlan upper on top $bridge_proto bridge"
+
+	ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_front_panel_port()
+{
+	RET=0
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a front panel not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a front panel rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a front panel"
+}
+
+create_8021ad_vlan_upper_on_top_bridge_port()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev br0 up
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a bridge port not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a bridge port rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a bridge port"
+
+	ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_lag()
+{
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+
+	ip link add name bond1.100 link bond1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a lag not rejected"
+
+	ip link add name bond1.100 link bond1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a lag rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a lag"
+
+	ip link del dev bond1
+}
+
+create_8021ad_vlan_upper_on_top_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1q" "802.1ad"
+}
+
+create_8021ad_vlan_upper_on_top_8021ad_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1ad" "802.1ad"
+}
+
+create_vlan_upper_on_top_8021ad_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1ad" "802.1q"
+}
+
+create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100 2>/dev/null
+	check_fail $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge rejected without extack"
+
+	log_test "create vlan upper on top of front panel enslaved to 802.1ad bridge"
+
+	ip link del dev br0
+}
+
+create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+	ip link set dev bond1 master br0
+
+	ip link add name bond1.100 link bond1 type vlan id 100 2>/dev/null
+	check_fail $? "vlan upper creation on top of lag enslaved to 802.1ad bridge not rejected"
+
+	ip link add name bond1.100 link bond1 type vlan id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "vlan upper creation on top of lag enslaved to 802.1ad bridge rejected without extack"
+
+	log_test "create vlan upper on top of lag enslaved to 802.1ad bridge"
+
+	ip link del dev bond1
+	ip link del dev br0
+}
+
+enslave_front_panel_with_vlan_upper_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100
+
+	ip link set dev $swp1 master br0 2>/dev/null
+	check_fail $? "front panel with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+	ip link set dev $swp1 master br0 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "front panel with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+	log_test "enslave front panel with vlan upper to 802.1ad bridge"
+
+	ip link del dev $swp1.100
+	ip link del dev br0
+}
+
+enslave_lag_with_vlan_upper_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+	ip link add name bond1.100 link bond1 type vlan id 100
+
+	ip link set dev bond1 master br0 2>/dev/null
+	check_fail $? "lag with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+	ip link set dev bond1 master br0 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "lag with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+	log_test "enslave lag with vlan upper to 802.1ad bridge"
+
+	ip link del dev bond1
+	ip link del dev br0
+}
+
+
+add_ip_address_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip addr add dev br0 192.0.2.17/28 2>/dev/null
+	check_fail $? "IP address addition to 802.1ad bridge not rejected"
+
+	ip addr add dev br0 192.0.2.17/28 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "IP address addition to 802.1ad bridge rejected without extack"
+
+	log_test "IP address addition to 802.1ad bridge"
+
+	ip link del dev br0
+}
+
+switch_bridge_protocol_from_8021q_to_8021ad()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 type bridge vlan_protocol 802.1q 2>/dev/null
+	check_fail $? "switching bridge protocol from 802.1q to 802.1ad not rejected"
+
+	log_test "switch bridge protocol"
+
+	ip link del dev br0
+}
+
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From f6a8250ea1e42ad1f4f3bab01c851ec5fd48f0e7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 30 Nov 2020 14:33:35 -0800
Subject: libbpf: Fix ring_buffer__poll() to return number of consumed samples

Fix ring_buffer__poll() to return the number of non-discarded records
consumed, just like its documentation states. It's also consistent with
ring_buffer__consume() return. Fix up selftests with wrong expected results.

Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support")
Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201130223336.904192-1-andrii@kernel.org
---
 tools/lib/bpf/ringbuf.c                                | 2 +-
 tools/testing/selftests/bpf/prog_tests/ringbuf.c       | 2 +-
 tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..98537ff2679e 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -278,7 +278,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 		err = ringbuf_process_ring(ring);
 		if (err < 0)
 			return err;
-		res += cnt;
+		res += err;
 	}
 	return cnt < 0 ? -errno : res;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index c1650548433c..1a48c6f7f54e 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -217,7 +217,7 @@ void test_ringbuf(void)
 	if (CHECK(err, "join_bg", "err %d\n", err))
 		goto cleanup;
 
-	if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+	if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret))
 		goto cleanup;
 
 	/* 3 rounds, 2 samples each */
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 78e450609803..d37161e59bb2 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -81,7 +81,7 @@ void test_ringbuf_multi(void)
 
 	/* poll for samples, should get 2 ringbufs back */
 	err = ring_buffer__poll(ringbuf, -1);
-	if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+	if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err))
 		goto cleanup;
 
 	/* expect extra polling to return nothing */
-- 
cgit v1.2.3


From 156c9b70dbfb83eeeff39e9202eb5f8bb6d0fd04 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 30 Nov 2020 14:33:36 -0800
Subject: selftests/bpf: Drain ringbuf samples at the end of test

Avoid occasional test failures due to the last sample being delayed to
another ring_buffer__poll() call. Instead, drain samples completely with
ring_buffer__consume(). This is supposed to fix a rare and non-deterministic
test failure in libbpf CI.

Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201130223336.904192-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 1a48c6f7f54e..fddbc5db5d6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -220,6 +220,12 @@ void test_ringbuf(void)
 	if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret))
 		goto cleanup;
 
+	/* due to timing variations, there could still be non-notified
+	 * samples, so consume them here to collect all the samples
+	 */
+	err = ring_buffer__consume(ringbuf);
+	CHECK(err < 0, "rb_consume", "failed: %d\b", err);
+
 	/* 3 rounds, 2 samples each */
 	cnt = atomic_xchg(&sample_cnt, 0);
 	CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
-- 
cgit v1.2.3


From 179ef035992e89646e17138b18b130bb874b86bb Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 27 Nov 2020 14:32:36 -0500
Subject: selftests: Add kselftest for syscall user dispatch

Implement functionality tests for syscall user dispatch.  In order to
make the test portable, refrain from open coding syscall dispatchers and
calculating glibc memory ranges.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201127193238.821364-6-krisman@collabora.com
---
 tools/testing/selftests/Makefile                   |   1 +
 .../selftests/syscall_user_dispatch/.gitignore     |   3 +
 .../selftests/syscall_user_dispatch/Makefile       |   9 +
 .../testing/selftests/syscall_user_dispatch/config |   1 +
 .../selftests/syscall_user_dispatch/sud_test.c     | 310 +++++++++++++++++++++
 5 files changed, 324 insertions(+)
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/.gitignore
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/Makefile
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/config
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/sud_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..96d5682fb9d8 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -56,6 +56,7 @@ TARGETS += sparc64
 TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
+TARGETS += syscall_user_dispatch
 TARGETS += sysctl
 TARGETS += tc-testing
 TARGETS += timens
diff --git a/tools/testing/selftests/syscall_user_dispatch/.gitignore b/tools/testing/selftests/syscall_user_dispatch/.gitignore
new file mode 100644
index 000000000000..f539615ad5da
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sud_test
+sud_benchmark
diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile
new file mode 100644
index 000000000000..8e15fa42bcda
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+top_srcdir = ../../../..
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+
+CFLAGS += -Wall -I$(LINUX_HDR_PATH)
+
+TEST_GEN_PROGS := sud_test
+include ../lib.mk
diff --git a/tools/testing/selftests/syscall_user_dispatch/config b/tools/testing/selftests/syscall_user_dispatch/config
new file mode 100644
index 000000000000..039e303e59d7
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/config
@@ -0,0 +1 @@
+CONFIG_GENERIC_ENTRY=y
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
new file mode 100644
index 000000000000..6498b050ef89
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Test code for syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <sys/prctl.h>
+#include <sys/sysinfo.h>
+#include <sys/syscall.h>
+#include <signal.h>
+
+#include <asm/unistd.h>
+#include "../kselftest_harness.h"
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH	59
+# define PR_SYS_DISPATCH_OFF	0
+# define PR_SYS_DISPATCH_ON	1
+#endif
+
+#ifndef SYS_USER_DISPATCH
+# define SYS_USER_DISPATCH	2
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
+#endif
+
+#define SYSCALL_DISPATCH_ON(x) ((x) = 1)
+#define SYSCALL_DISPATCH_OFF(x) ((x) = 0)
+
+/* Test Summary:
+ *
+ * - dispatch_trigger_sigsys: Verify if PR_SET_SYSCALL_USER_DISPATCH is
+ *   able to trigger SIGSYS on a syscall.
+ *
+ * - bad_selector: Test that a bad selector value triggers SIGSYS with
+ *   si_errno EINVAL.
+ *
+ * - bad_prctl_param: Test that the API correctly rejects invalid
+ *   parameters on prctl
+ *
+ * - dispatch_and_return: Test that a syscall is selectively dispatched
+ *   to userspace depending on the value of selector.
+ *
+ * - disable_dispatch: Test that the PR_SYS_DISPATCH_OFF correctly
+ *   disables the dispatcher
+ *
+ * - direct_dispatch_range: Test that a syscall within the allowed range
+ *   can bypass the dispatcher.
+ */
+
+TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
+{
+	char sel = 0;
+	struct sysinfo info;
+	int ret;
+
+	ret = sysinfo(&info);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	SYSCALL_DISPATCH_ON(sel);
+
+	sysinfo(&info);
+
+	EXPECT_FALSE(true) {
+		TH_LOG("Unreachable!");
+	}
+}
+
+TEST(bad_prctl_param)
+{
+	char sel = 0;
+	int op;
+
+	/* Invalid op */
+	op = -1;
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel);
+	ASSERT_EQ(EINVAL, errno);
+
+	/* PR_SYS_DISPATCH_OFF */
+	op = PR_SYS_DISPATCH_OFF;
+
+	/* offset != 0 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* len != 0 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* sel != NULL */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* Valid parameter */
+	errno = 0;
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0);
+	EXPECT_EQ(0, errno);
+
+	/* PR_SYS_DISPATCH_ON */
+	op = PR_SYS_DISPATCH_ON;
+
+	/* Dispatcher region is bad (offset > 0 && len == 0) */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel);
+	EXPECT_EQ(EINVAL, errno);
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* Invalid selector */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1);
+	ASSERT_EQ(EFAULT, errno);
+
+	/*
+	 * Dispatcher range overflows unsigned long
+	 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel);
+	ASSERT_EQ(EINVAL, errno) {
+		TH_LOG("Should reject bad syscall range");
+	}
+
+	/*
+	 * Allowed range overflows usigned long
+	 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel);
+	ASSERT_EQ(EINVAL, errno) {
+		TH_LOG("Should reject bad syscall range");
+	}
+}
+
+/*
+ * Use global selector for handle_sigsys tests, to avoid passing
+ * selector to signal handler
+ */
+char glob_sel;
+int nr_syscalls_emulated;
+int si_code;
+int si_errno;
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+	si_code = info->si_code;
+	si_errno = info->si_errno;
+
+	if (info->si_syscall == MAGIC_SYSCALL_1)
+		nr_syscalls_emulated++;
+
+	/* In preparation for sigreturn. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+}
+
+TEST(dispatch_and_return)
+{
+	long ret;
+	struct sigaction act;
+	sigset_t mask;
+
+	glob_sel = 0;
+	nr_syscalls_emulated = 0;
+	si_code = 0;
+	si_errno = 0;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+
+	act.sa_sigaction = handle_sigsys;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_mask = mask;
+
+	ret = sigaction(SIGSYS, &act, NULL);
+	ASSERT_EQ(0, ret);
+
+	/* Make sure selector is good prior to prctl. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	/* MAGIC_SYSCALL_1 doesn't exist. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+	ret = syscall(MAGIC_SYSCALL_1);
+	EXPECT_EQ(-1, ret) {
+		TH_LOG("Dispatch triggered unexpectedly");
+	}
+
+	/* MAGIC_SYSCALL_1 should be emulated. */
+	nr_syscalls_emulated = 0;
+	SYSCALL_DISPATCH_ON(glob_sel);
+
+	ret = syscall(MAGIC_SYSCALL_1);
+	EXPECT_EQ(MAGIC_SYSCALL_1, ret) {
+		TH_LOG("Failed to intercept syscall");
+	}
+	EXPECT_EQ(1, nr_syscalls_emulated) {
+		TH_LOG("Failed to emulate syscall");
+	}
+	ASSERT_EQ(SYS_USER_DISPATCH, si_code) {
+		TH_LOG("Bad si_code in SIGSYS");
+	}
+	ASSERT_EQ(0, si_errno) {
+		TH_LOG("Bad si_errno in SIGSYS");
+	}
+}
+
+TEST_SIGNAL(bad_selector, SIGSYS)
+{
+	long ret;
+	struct sigaction act;
+	sigset_t mask;
+	struct sysinfo info;
+
+	glob_sel = 0;
+	nr_syscalls_emulated = 0;
+	si_code = 0;
+	si_errno = 0;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+
+	act.sa_sigaction = handle_sigsys;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_mask = mask;
+
+	ret = sigaction(SIGSYS, &act, NULL);
+	ASSERT_EQ(0, ret);
+
+	/* Make sure selector is good prior to prctl. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	glob_sel = -1;
+
+	sysinfo(&info);
+
+	/* Even though it is ready to catch SIGSYS, the signal is
+	 * supposed to be uncatchable.
+	 */
+
+	EXPECT_FALSE(true) {
+		TH_LOG("Unreachable!");
+	}
+}
+
+TEST(disable_dispatch)
+{
+	int ret;
+	struct sysinfo info;
+	char sel = 0;
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	/* MAGIC_SYSCALL_1 doesn't exist. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF, 0, 0, 0);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Failed to unset syscall user dispatch");
+	}
+
+	/* Shouldn't have any effect... */
+	SYSCALL_DISPATCH_ON(glob_sel);
+
+	ret = syscall(__NR_sysinfo, &info);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Dispatch triggered unexpectedly");
+	}
+}
+
+TEST(direct_dispatch_range)
+{
+	int ret = 0;
+	struct sysinfo info;
+	char sel = 0;
+
+	/*
+	 * Instead of calculating libc addresses; allow the entire
+	 * memory map and lock the selector.
+	 */
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	SYSCALL_DISPATCH_ON(sel);
+
+	ret = sysinfo(&info);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Dispatch triggered unexpectedly");
+	}
+}
+
+TEST_HARNESS_MAIN
-- 
cgit v1.2.3


From d87ae0fa21c26db2d7c66f22dee9c27ecda48ce2 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 27 Nov 2020 14:32:37 -0500
Subject: selftests: Add benchmark for syscall user dispatch

This is the patch I'm using to evaluate the impact syscall user dispatch
has on native syscall (syscalls not redirected to userspace) when
enabled for the process and submiting syscalls though the unblocked
dispatch selector. It works by running a step to define a baseline of
the cost of executing sysinfo, then enabling SUD, and rerunning that
step.

On my test machine, an AMD Ryzen 5 1500X, I have the following results
with the latest version of syscall user dispatch patches.

root@olga:~# syscall_user_dispatch/sud_benchmark
  Calibrating test set to last ~5 seconds...
  test iterations = 37500000
  Avg syscall time 134ns.
  Caught sys_ff00
  trapped_call_count 1, native_call_count 0.
  Avg syscall time 147ns.
  Interception overhead: 9.7% (+13ns).

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201127193238.821364-7-krisman@collabora.com
---
 .../selftests/syscall_user_dispatch/Makefile       |   2 +-
 .../syscall_user_dispatch/sud_benchmark.c          | 200 +++++++++++++++++++++
 2 files changed, 201 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c

(limited to 'tools')

diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile
index 8e15fa42bcda..03c120270953 100644
--- a/tools/testing/selftests/syscall_user_dispatch/Makefile
+++ b/tools/testing/selftests/syscall_user_dispatch/Makefile
@@ -5,5 +5,5 @@ LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 
 CFLAGS += -Wall -I$(LINUX_HDR_PATH)
 
-TEST_GEN_PROGS := sud_test
+TEST_GEN_PROGS := sud_test sud_benchmark
 include ../lib.mk
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
new file mode 100644
index 000000000000..6689f1183dbf
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Benchmark and test syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH	59
+# define PR_SYS_DISPATCH_OFF	0
+# define PR_SYS_DISPATCH_ON	1
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
+#endif
+
+/*
+ * To test returning from a sigsys with selector blocked, the test
+ * requires some per-architecture support (i.e. knowledge about the
+ * signal trampoline address).  On i386, we know it is on the vdso, and
+ * a small trampoline is open-coded for x86_64.  Other architectures
+ * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
+ * out of the box, but don't enable them until they support syscall user
+ * dispatch.
+ */
+#if defined(__x86_64__) || defined(__i386__)
+#define TEST_BLOCKED_RETURN
+#endif
+
+#ifdef __x86_64__
+void* (syscall_dispatcher_start)(void);
+void* (syscall_dispatcher_end)(void);
+#else
+unsigned long syscall_dispatcher_start = 0;
+unsigned long syscall_dispatcher_end = 0;
+#endif
+
+unsigned long trapped_call_count = 0;
+unsigned long native_call_count = 0;
+
+char selector;
+#define SYSCALL_BLOCK   (selector = PR_SYS_DISPATCH_ON)
+#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
+
+#define CALIBRATION_STEP 100000
+#define CALIBRATE_TO_SECS 5
+int factor;
+
+static double one_sysinfo_step(void)
+{
+	struct timespec t1, t2;
+	int i;
+	struct sysinfo info;
+
+	clock_gettime(CLOCK_MONOTONIC, &t1);
+	for (i = 0; i < CALIBRATION_STEP; i++)
+		sysinfo(&info);
+	clock_gettime(CLOCK_MONOTONIC, &t2);
+	return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
+}
+
+static void calibrate_set(void)
+{
+	double elapsed = 0;
+
+	printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
+
+	while (elapsed < 1) {
+		elapsed += one_sysinfo_step();
+		factor += CALIBRATE_TO_SECS;
+	}
+
+	printf("test iterations = %d\n", CALIBRATION_STEP * factor);
+}
+
+static double perf_syscall(void)
+{
+	unsigned int i;
+	double partial = 0;
+
+	for (i = 0; i < factor; ++i)
+		partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
+	return partial;
+}
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+	char buf[1024];
+	int len;
+
+	SYSCALL_UNBLOCK;
+
+	/* printf and friends are not signal-safe. */
+	len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
+	write(1, buf, len);
+
+	if (info->si_syscall == MAGIC_SYSCALL_1)
+		trapped_call_count++;
+	else
+		native_call_count++;
+
+#ifdef TEST_BLOCKED_RETURN
+	SYSCALL_BLOCK;
+#endif
+
+#ifdef __x86_64__
+	__asm__ volatile("movq $0xf, %rax");
+	__asm__ volatile("leaveq");
+	__asm__ volatile("add $0x8, %rsp");
+	__asm__ volatile("syscall_dispatcher_start:");
+	__asm__ volatile("syscall");
+	__asm__ volatile("nop"); /* Landing pad within dispatcher area */
+	__asm__ volatile("syscall_dispatcher_end:");
+#endif
+
+}
+
+int main(void)
+{
+	struct sigaction act;
+	double time1, time2;
+	int ret;
+	sigset_t mask;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+
+	act.sa_sigaction = handle_sigsys;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_mask = mask;
+
+	calibrate_set();
+
+	time1 = perf_syscall();
+	printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
+
+	ret = sigaction(SIGSYS, &act, NULL);
+	if (ret) {
+		perror("Error sigaction:");
+		exit(-1);
+	}
+
+	fprintf(stderr, "Enabling syscall trapping.\n");
+
+	if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
+		  syscall_dispatcher_start,
+		  (syscall_dispatcher_end - syscall_dispatcher_start + 1),
+		  &selector)) {
+		perror("prctl failed\n");
+		exit(-1);
+	}
+
+	SYSCALL_BLOCK;
+	syscall(MAGIC_SYSCALL_1);
+
+#ifdef TEST_BLOCKED_RETURN
+	if (selector == PR_SYS_DISPATCH_OFF) {
+		fprintf(stderr, "Failed to return with selector blocked.\n");
+		exit(-1);
+	}
+#endif
+
+	SYSCALL_UNBLOCK;
+
+	if (!trapped_call_count) {
+		fprintf(stderr, "syscall trapping does not work.\n");
+		exit(-1);
+	}
+
+	time2 = perf_syscall();
+
+	if (native_call_count) {
+		perror("syscall trapping intercepted more syscalls than expected\n");
+		exit(-1);
+	}
+
+	printf("trapped_call_count %lu, native_call_count %lu.\n",
+	       trapped_call_count, native_call_count);
+	printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
+	printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
+	       100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
+	return 0;
+
+}
-- 
cgit v1.2.3


From 2c07343abd8932200a45ff7b10950e71081e9e77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@linux.microsoft.com>
Date: Wed, 2 Dec 2020 17:26:43 +0100
Subject: selftests/seccomp: Update kernel config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

seccomp_bpf.c uses unshare(CLONE_NEWPID), which requires CONFIG_PID_NS
to be set.

Cc: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201202162643.249276-1-mic@digikod.net
---
 tools/testing/selftests/seccomp/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
index 64c19d8eba79..ad431a5178fb 100644
--- a/tools/testing/selftests/seccomp/config
+++ b/tools/testing/selftests/seccomp/config
@@ -1,3 +1,4 @@
+CONFIG_PID_NS=y
 CONFIG_SECCOMP=y
 CONFIG_SECCOMP_FILTER=y
 CONFIG_USER_NS=y
-- 
cgit v1.2.3


From a999696c547f1a8ef2ddbb9b0e77abc3f6db4ff1 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 09:25:14 -0800
Subject: selftests/bpf: Rewrite test_sock_addr bind bpf into C

I'm planning to extend it in the next patches. It's much easier to
work with C than BPF assembly.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201202172516.3483656-2-sdf@google.com
---
 tools/testing/selftests/bpf/progs/bind4_prog.c |  71 +++++++++
 tools/testing/selftests/bpf/progs/bind6_prog.c |  88 +++++++++++
 tools/testing/selftests/bpf/test_sock_addr.c   | 196 ++-----------------------
 3 files changed, 171 insertions(+), 184 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bind4_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/bind6_prog.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
new file mode 100644
index 000000000000..0951302a984a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV4_IP		0xc0a801feU /* 192.168.1.254 */
+#define SERV4_PORT		4040
+#define SERV4_REWRITE_IP	0x7f000001U /* 127.0.0.1 */
+#define SERV4_REWRITE_PORT	4444
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip4;
+	__u16 user_port;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+
+	if (ctx->user_ip4 != bpf_htonl(SERV4_IP) ||
+	    ctx->user_port != bpf_htons(SERV4_PORT))
+		return 0;
+
+	// u8 narrow loads:
+	user_ip4 = 0;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+	if (ctx->user_ip4 != user_ip4)
+		return 0;
+
+	user_port = 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+	if (ctx->user_port != user_port)
+		return 0;
+
+	// u16 narrow loads:
+	user_ip4 = 0;
+	user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
+	user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+	if (ctx->user_ip4 != user_ip4)
+		return 0;
+
+	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
+	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
new file mode 100644
index 000000000000..16da1cf85418
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV6_IP_0		0xfaceb00c /* face:b00c:1234:5678::abcd */
+#define SERV6_IP_1		0x12345678
+#define SERV6_IP_2		0x00000000
+#define SERV6_IP_3		0x0000abcd
+#define SERV6_PORT		6060
+#define SERV6_REWRITE_IP_0	0x00000000
+#define SERV6_REWRITE_IP_1	0x00000000
+#define SERV6_REWRITE_IP_2	0x00000000
+#define SERV6_REWRITE_IP_3	0x00000001
+#define SERV6_REWRITE_PORT	6666
+
+SEC("cgroup/bind6")
+int bind_v6_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip6;
+	__u16 user_port;
+	int i;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET6)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+
+	if (ctx->user_ip6[0] != bpf_htonl(SERV6_IP_0) ||
+	    ctx->user_ip6[1] != bpf_htonl(SERV6_IP_1) ||
+	    ctx->user_ip6[2] != bpf_htonl(SERV6_IP_2) ||
+	    ctx->user_ip6[3] != bpf_htonl(SERV6_IP_3) ||
+	    ctx->user_port != bpf_htons(SERV6_PORT))
+		return 0;
+
+	// u8 narrow loads:
+	for (i = 0; i < 4; i++) {
+		user_ip6 = 0;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+		if (ctx->user_ip6[i] != user_ip6)
+			return 0;
+	}
+
+	user_port = 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+	if (ctx->user_port != user_port)
+		return 0;
+
+	// u16 narrow loads:
+	for (i = 0; i < 4; i++) {
+		user_ip6 = 0;
+		user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
+		user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+		if (ctx->user_ip6[i] != user_ip6)
+			return 0;
+	}
+
+	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
+	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
+	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
+	ctx->user_ip6[3] = bpf_htonl(SERV6_REWRITE_IP_3);
+	ctx->user_port = bpf_htons(SERV6_REWRITE_PORT);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index b8c72c1d9cf7..dcb83ab02919 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -31,6 +31,8 @@
 #define CONNECT6_PROG_PATH	"./connect6_prog.o"
 #define SENDMSG4_PROG_PATH	"./sendmsg4_prog.o"
 #define SENDMSG6_PROG_PATH	"./sendmsg6_prog.o"
+#define BIND4_PROG_PATH		"./bind4_prog.o"
+#define BIND6_PROG_PATH		"./bind6_prog.o"
 
 #define SERV4_IP		"192.168.1.254"
 #define SERV4_REWRITE_IP	"127.0.0.1"
@@ -660,190 +662,6 @@ static int load_insns(const struct sock_addr_test *test,
 	return ret;
 }
 
-/* [1] These testing programs try to read different context fields, including
- * narrow loads of different sizes from user_ip4 and user_ip6, and write to
- * those allowed to be overridden.
- *
- * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
- * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
- * in such cases since it accepts only _signed_ 32bit integer as IMM
- * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
- * to count jumps properly.
- */
-
-static int bind4_prog_load(const struct sock_addr_test *test)
-{
-	union {
-		uint8_t u4_addr8[4];
-		uint16_t u4_addr16[2];
-		uint32_t u4_addr32;
-	} ip4, port;
-	struct sockaddr_in addr4_rw;
-
-	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
-		log_err("Invalid IPv4: %s", SERV4_IP);
-		return -1;
-	}
-
-	port.u4_addr32 = htons(SERV4_PORT);
-
-	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
-			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
-		return -1;
-
-	/* See [1]. */
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
-		/* if (sk.family == AF_INET && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
-
-		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, type)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
-		BPF_JMP_A(1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
-
-		/*     1st_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
-
-		/*     2nd_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
-
-		/*     3rd_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
-
-		/*     4th_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 3),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
-
-		/*     1st_half_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
-
-		/*     2nd_half_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
-
-		/*     whole_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
-
-		/*     1st_byte_of_user_port == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
-
-		/*     1st_half_of_user_port == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
-
-		/*     user_port == expected) { */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
-
-		/*      user_ip4 = addr4_rw.sin_addr */
-		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-
-		/*      user_port = addr4_rw.sin_port */
-		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		/* } */
-
-		/* return 1 */
-		BPF_MOV64_IMM(BPF_REG_0, 1),
-		BPF_EXIT_INSN(),
-	};
-
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
-static int bind6_prog_load(const struct sock_addr_test *test)
-{
-	struct sockaddr_in6 addr6_rw;
-	struct in6_addr ip6;
-
-	if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
-		log_err("Invalid IPv6: %s", SERV6_IP);
-		return -1;
-	}
-
-	if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
-			(struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
-		return -1;
-
-	/* See [1]. */
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
-		/* if (sk.family == AF_INET6 && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
-		/*            5th_byte_of_user_ip6 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[1])),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
-
-		/*            3rd_half_of_user_ip6 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[1])),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
-
-		/*            last_word_of_user_ip6 == expected) { */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[3])),
-		BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]),  /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
-
-
-#define STORE_IPV6_WORD(N)						       \
-		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]),     \
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
-			    offsetof(struct bpf_sock_addr, user_ip6[N]))
-
-		/*      user_ip6 = addr6_rw.sin6_addr */
-		STORE_IPV6_WORD(0),
-		STORE_IPV6_WORD(1),
-		STORE_IPV6_WORD(2),
-		STORE_IPV6_WORD(3),
-
-		/*      user_port = addr6_rw.sin6_port */
-		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_port)),
-
-		/* } */
-
-		/* return 1 */
-		BPF_MOV64_IMM(BPF_REG_0, 1),
-		BPF_EXIT_INSN(),
-	};
-
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
 static int load_path(const struct sock_addr_test *test, const char *path)
 {
 	struct bpf_prog_load_attr attr;
@@ -865,6 +683,16 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 	return prog_fd;
 }
 
+static int bind4_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, BIND4_PROG_PATH);
+}
+
+static int bind6_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, BIND6_PROG_PATH);
+}
+
 static int connect4_prog_load(const struct sock_addr_test *test)
 {
 	return load_path(test, CONNECT4_PROG_PATH);
-- 
cgit v1.2.3


From a540c81a2bcb95227c3e24a4478956824858a6b0 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 09:25:16 -0800
Subject: selftests/bpf: Extend bind{4,6} programs with a call to
 bpf_setsockopt

To make sure it doesn't trigger sock_owned_by_me splat.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201202172516.3483656-4-sdf@google.com
---
 tools/testing/selftests/bpf/progs/bind4_prog.c | 31 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bind6_prog.c | 31 ++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 0951302a984a..c6520f21f5f5 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -19,6 +19,33 @@
 #define SERV4_REWRITE_IP	0x7f000001U /* 127.0.0.1 */
 #define SERV4_REWRITE_PORT	4444
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/bind4")
 int bind_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -62,6 +89,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
 	if (ctx->user_ip4 != user_ip4)
 		return 0;
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
 	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
 
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 16da1cf85418..4358e44dcf47 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -25,6 +25,33 @@
 #define SERV6_REWRITE_IP_3	0x00000001
 #define SERV6_REWRITE_PORT	6666
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/bind6")
 int bind_v6_prog(struct bpf_sock_addr *ctx)
 {
@@ -76,6 +103,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
 			return 0;
 	}
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
 	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
 	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
-- 
cgit v1.2.3


From 80ee81e0403c48f4eb342f7c8d40477c89b8836a Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 1 Dec 2020 13:58:58 -0800
Subject: bpf: Eliminate rlimit-based memory accounting infra for bpf maps

Remove rlimit-based accounting infrastructure code, which is not used
anymore.

To provide a backward compatibility, use an approximation of the
bpf map memory footprint as a "memlock" value, available to a user
via map info. The approximation is based on the maximal number of
elements and key and value sizes.

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201201215900.3569844-33-guro@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c | 2 +-
 tools/testing/selftests/bpf/progs/map_ptr_kern.c     | 7 -------
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index 08651b23edba..b83b5d2e17dc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -23,6 +23,6 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 
 	BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
 		       map->usercnt.counter,
-		       map->memory.user->locked_vm.counter);
+		       0LLU);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index c325405751e2..d8850bc6a9f1 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -26,17 +26,12 @@ __u32 g_line = 0;
 		return 0;	\
 })
 
-struct bpf_map_memory {
-	__u32 pages;
-} __attribute__((preserve_access_index));
-
 struct bpf_map {
 	enum bpf_map_type map_type;
 	__u32 key_size;
 	__u32 value_size;
 	__u32 max_entries;
 	__u32 id;
-	struct bpf_map_memory memory;
 } __attribute__((preserve_access_index));
 
 static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
@@ -47,7 +42,6 @@ static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
 	VERIFY(map->value_size == value_size);
 	VERIFY(map->max_entries == max_entries);
 	VERIFY(map->id > 0);
-	VERIFY(map->memory.pages > 0);
 
 	return 1;
 }
@@ -60,7 +54,6 @@ static inline int check_bpf_map_ptr(struct bpf_map *indirect,
 	VERIFY(indirect->value_size == direct->value_size);
 	VERIFY(indirect->max_entries == direct->max_entries);
 	VERIFY(indirect->id == direct->id);
-	VERIFY(indirect->memory.pages == direct->memory.pages);
 
 	return 1;
 }
-- 
cgit v1.2.3


From 4e62d55d77bbdb33d821f5e16306caab38d42267 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Wed, 28 Oct 2020 10:50:44 +1100
Subject: selftests: openat2: add RESOLVE_ conflict test

Now that we reject conflicting RESOLVE_ flags, add a selftest to avoid
regressions.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20201027235044.5240-3-cyphar@cyphar.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/openat2/openat2_test.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index b386367c606b..381d874cce99 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -155,7 +155,7 @@ struct flag_test {
 	int err;
 };
 
-#define NUM_OPENAT2_FLAG_TESTS 23
+#define NUM_OPENAT2_FLAG_TESTS 24
 
 void test_openat2_flags(void)
 {
@@ -210,6 +210,12 @@ void test_openat2_flags(void)
 		  .how.flags = O_TMPFILE | O_RDWR,
 		  .how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
 
+		/* ->resolve flags must not conflict. */
+		{ .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
+		  .how.flags = O_RDONLY,
+		  .how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
+		  .err = -EINVAL },
+
 		/* ->resolve must only contain RESOLVE_* flags. */
 		{ .name = "invalid how.resolve and O_RDONLY",
 		  .how.flags = O_RDONLY,
-- 
cgit v1.2.3


From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001
From: Thomas Karlsson <thomas.karlsson@paneda.se>
Date: Wed, 2 Dec 2020 19:49:58 +0100
Subject: macvlan: Support for high multicast packet rate

Background:
Broadcast and multicast packages are enqueued for later processing.
This queue was previously hardcoded to 1000.

This proved insufficient for handling very high packet rates.
This resulted in packet drops for multicast.
While at the same time unicast worked fine.

The change:
This patch make the queue length adjustable to accommodate
for environments with very high multicast packet rate.
But still keeps the default value of 1000 unless specified.

The queue length is specified as a request per macvlan
using the IFLA_MACVLAN_BC_QUEUE_LEN parameter.

The actual used queue length will then be the maximum of
any macvlan connected to the same port. The actual used
queue length for the port can be retrieved (read only)
by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification.

This will be followed up by a patch to iproute2
in order to adjust the parameter from userspace.

Signed-off-by: Thomas Karlsson <thomas.karlsson@paneda.se>
Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/include/uapi/linux/if_link.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 781e482dc499..d208b2af697f 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -409,6 +409,8 @@ enum {
 	IFLA_MACVLAN_MACADDR,
 	IFLA_MACVLAN_MACADDR_DATA,
 	IFLA_MACVLAN_MACADDR_COUNT,
+	IFLA_MACVLAN_BC_QUEUE_LEN,
+	IFLA_MACVLAN_BC_QUEUE_LEN_USED,
 	__IFLA_MACVLAN_MAX,
 };
 
-- 
cgit v1.2.3


From 0c55f867f0c96dff93d4e0b5973975d65afb26d8 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Wed, 2 Dec 2020 21:35:36 +0100
Subject: selftests: kvm/set_memory_region_test: Fix race in move region test

The current memory region move test correctly handles the situation that
the second (realigning) memslot move operation would temporarily trigger
MMIO until it completes, however it does not handle the case in which the
first (misaligning) move operation does this, too.
This results in false test assertions in case it does so.

Fix this by handling temporary MMIO from the first memslot move operation
in the test guest code, too.

Fixes: 8a0639fe9201 ("KVM: sefltests: Add explicit synchronization to move mem region test")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <0fdddb94bb0e31b7da129a809a308d91c10c0b5e.1606941224.git.maciej.szmigiero@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/set_memory_region_test.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index b3ece55a2da6..6f441dd9f33c 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -156,14 +156,23 @@ static void guest_code_move_memory_region(void)
 	GUEST_SYNC(0);
 
 	/*
-	 * Spin until the memory region is moved to a misaligned address.  This
-	 * may or may not trigger MMIO, as the window where the memslot is
-	 * invalid is quite small.
+	 * Spin until the memory region starts getting moved to a
+	 * misaligned address.
+	 * Every region move may or may not trigger MMIO, as the
+	 * window where the memslot is invalid is usually quite small.
 	 */
 	val = guest_spin_on_val(0);
 	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
 
-	/* Spin until the memory region is realigned. */
+	/* Spin until the misaligning memory region move completes. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 1 || val == 0, val);
+
+	/* Spin until the memory region starts to get re-aligned. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+	/* Spin until the re-aligning memory region move completes. */
 	val = guest_spin_on_val(MMIO_VAL);
 	GUEST_ASSERT_1(val == 1, val);
 
-- 
cgit v1.2.3


From 71ccb50074f31a50a1da4c1d8306d54da0907b00 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 1 Dec 2020 22:52:41 -0800
Subject: tools/bpftool: Emit name <anon> for anonymous BTFs

For consistency of output, emit "name <anon>" for BTFs without the name. This
keeps output more consistent and obvious.

Suggested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202065244.530571-2-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index ed5e97157241..bd46af6a61cc 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -750,6 +750,8 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 		printf("name [%s]  ", name);
 	else if (name && name[0])
 		printf("name %s  ", name);
+	else
+		printf("name <anon>  ");
 	printf("size %uB", info->btf_size);
 
 	n = 0;
-- 
cgit v1.2.3


From 0cfdcd6378071f383c900e3d8862347e2af1d1ca Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 1 Dec 2020 22:52:42 -0800
Subject: libbpf: Add base BTF accessor

Add ability to get base BTF. It can be also used to check if BTF is split BTF.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202065244.530571-3-andrii@kernel.org
---
 tools/lib/bpf/btf.c      | 5 +++++
 tools/lib/bpf/btf.h      | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 3 files changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8ff46cd30ca1..1935e83d309c 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -432,6 +432,11 @@ __u32 btf__get_nr_types(const struct btf *btf)
 	return btf->start_id + btf->nr_types - 1;
 }
 
+const struct btf *btf__base_btf(const struct btf *btf)
+{
+	return btf->base_btf;
+}
+
 /* internal helper returning non-const pointer to a type */
 static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1093f6fe6800..1237bcd1dd17 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -51,6 +51,7 @@ LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
 					const char *type_name, __u32 kind);
 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 						  __u32 id);
 LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 29ff4807b909..ed55498c4122 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -340,6 +340,7 @@ LIBBPF_0.2.0 {
 
 LIBBPF_0.3.0 {
 	global:
+		btf__base_btf;
 		btf__parse_elf_split;
 		btf__parse_raw_split;
 		btf__parse_split;
-- 
cgit v1.2.3


From fa4528379a51ff8d5271e1bcfa0d5ef71657869f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 1 Dec 2020 22:52:43 -0800
Subject: tools/bpftool: Auto-detect split BTFs in common cases

In case of working with module's split BTF from /sys/kernel/btf/*,
auto-substitute /sys/kernel/btf/vmlinux as the base BTF. This makes using
bpftool with module BTFs faster and simpler.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202065244.530571-4-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index bd46af6a61cc..94cd3bad5430 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -357,11 +357,13 @@ static int dump_btf_raw(const struct btf *btf,
 			dump_btf_type(btf, root_type_ids[i], t);
 		}
 	} else {
+		const struct btf *base;
 		int cnt = btf__get_nr_types(btf);
 		int start_id = 1;
 
-		if (base_btf)
-			start_id = btf__get_nr_types(base_btf) + 1;
+		base = btf__base_btf(btf);
+		if (base)
+			start_id = btf__get_nr_types(base) + 1;
 
 		for (i = start_id; i <= cnt; i++) {
 			t = btf__type_by_id(btf, i);
@@ -428,7 +430,7 @@ done:
 
 static int do_dump(int argc, char **argv)
 {
-	struct btf *btf = NULL;
+	struct btf *btf = NULL, *base = NULL;
 	__u32 root_type_ids[2];
 	int root_type_cnt = 0;
 	bool dump_c = false;
@@ -502,7 +504,21 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		btf = btf__parse_split(*argv, base_btf);
+		const char sysfs_prefix[] = "/sys/kernel/btf/";
+		const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux";
+
+		if (!base_btf &&
+		    strncmp(*argv, sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0 &&
+		    strcmp(*argv, sysfs_vmlinux) != 0) {
+			base = btf__parse(sysfs_vmlinux, NULL);
+			if (libbpf_get_error(base)) {
+				p_err("failed to parse vmlinux BTF at '%s': %ld\n",
+				      sysfs_vmlinux, libbpf_get_error(base));
+				base = NULL;
+			}
+		}
+
+		btf = btf__parse_split(*argv, base ?: base_btf);
 		if (IS_ERR(btf)) {
 			err = -PTR_ERR(btf);
 			btf = NULL;
@@ -567,6 +583,7 @@ static int do_dump(int argc, char **argv)
 done:
 	close(fd);
 	btf__free(btf);
+	btf__free(base);
 	return err;
 }
 
-- 
cgit v1.2.3


From a874c8c389a12b9f5ab67ba01995f06bf82e94fe Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 09:49:47 -0800
Subject: selftests/bpf: Copy file using read/write in local storage test

Splice (copy_file_range) doesn't work on all filesystems. I'm running
test kernels on top of my read-only disk image and it uses plan9 under the
hood. This prevents test_local_storage from successfully passing.

There is really no technical reason to use splice, so lets do
old-school read/write to copy file; this should work in all
environments.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202174947.3621989-1-sdf@google.com
---
 .../selftests/bpf/prog_tests/test_local_storage.c  | 28 ++++++++++++++--------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index fcca7ba1f368..c0fe73a17ed1 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -21,14 +21,6 @@ static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 	return syscall(__NR_pidfd_open, pid, flags);
 }
 
-static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out,
-				      loff_t *off_out, size_t len,
-				      unsigned int flags)
-{
-	return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out,
-		       len, flags);
-}
-
 static unsigned int duration;
 
 #define TEST_STORAGE_VALUE 0xbeefdead
@@ -47,6 +39,7 @@ static int copy_rm(char *dest)
 {
 	int fd_in, fd_out = -1, ret = 0;
 	struct stat stat;
+	char *buf = NULL;
 
 	fd_in = open("/bin/rm", O_RDONLY);
 	if (fd_in < 0)
@@ -64,18 +57,33 @@ static int copy_rm(char *dest)
 		goto out;
 	}
 
-	ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0);
-	if (ret == -1) {
+	buf = malloc(stat.st_blksize);
+	if (!buf) {
 		ret = -errno;
 		goto out;
 	}
 
+	while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) {
+		ret = write(fd_out, buf, ret);
+		if (ret < 0) {
+			ret = -errno;
+			goto out;
+
+		}
+	}
+	if (ret < 0) {
+		ret = -errno;
+		goto out;
+
+	}
+
 	/* Set executable permission on the copied file */
 	ret = chmod(dest, 0100);
 	if (ret == -1)
 		ret = -errno;
 
 out:
+	free(buf);
 	close(fd_in);
 	close(fd_out);
 	return ret;
-- 
cgit v1.2.3


From 22e8ebe35a2e30ee19e02c41cacc99c2f896bc4b Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Thu, 3 Dec 2020 10:22:34 +0000
Subject: tools/resolve_btfids: Fix some error messages

Add missing newlines and fix polarity of strerror argument.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20201203102234.648540-1-jackmanb@google.com
---
 tools/bpf/resolve_btfids/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index dfa540d8a02d..e3ea569ee125 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -454,7 +454,7 @@ static int symbols_collect(struct object *obj)
 			return -ENOMEM;
 
 		if (id->addr_cnt >= ADDR_CNT) {
-			pr_err("FAILED symbol %s crossed the number of allowed lists",
+			pr_err("FAILED symbol %s crossed the number of allowed lists\n",
 				id->name);
 			return -1;
 		}
@@ -477,8 +477,8 @@ static int symbols_resolve(struct object *obj)
 	btf = btf__parse(obj->btf ?: obj->path, NULL);
 	err = libbpf_get_error(btf);
 	if (err) {
-		pr_err("FAILED: load BTF from %s: %s",
-			obj->path, strerror(err));
+		pr_err("FAILED: load BTF from %s: %s\n",
+			obj->path, strerror(-err));
 		return -1;
 	}
 
-- 
cgit v1.2.3


From e459f49b4394e2630ea55d5ac7a49402686848fe Mon Sep 17 00:00:00 2001
From: Mariusz Dudek <mariuszx.dudek@intel.com>
Date: Thu, 3 Dec 2020 10:05:45 +0100
Subject: libbpf: Separate XDP program load with xsk socket creation

Add support for separation of eBPF program load and xsk socket
creation.

This is needed for use-case when you want to privide as little
privileges as possible to the data plane application that will
handle xsk socket creation and incoming traffic.

With this patch the data entity container can be run with only
CAP_NET_RAW capability to fulfill its purpose of creating xsk
socket and handling packages. In case your umem is larger or
equal process limit for MEMLOCK you need either increase the
limit or CAP_IPC_LOCK capability.

To resolve privileges issue two APIs are introduced:

- xsk_setup_xdp_prog - loads the built in XDP program. It can
also return xsks_map_fd which is needed by unprivileged process
to update xsks_map with AF_XDP socket "fd"

- xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap
for a particular xsk_socket

Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20201203090546.11976-2-mariuszx.dudek@intel.com
---
 tools/lib/bpf/libbpf.map |  2 ++
 tools/lib/bpf/xsk.c      | 92 +++++++++++++++++++++++++++++++++++++++++++-----
 tools/lib/bpf/xsk.h      |  5 +++
 3 files changed, 90 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index ed55498c4122..7c4126542e2b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -346,4 +346,6 @@ LIBBPF_0.3.0 {
 		btf__parse_split;
 		btf__new_empty_split;
 		btf__new_split;
+		xsk_setup_xdp_prog;
+		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 9bc537d0b92d..4b051ec7cfbb 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -566,8 +566,35 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 				   &xsk->fd, 0);
 }
 
-static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 {
+	char ifname[IFNAMSIZ];
+	struct xsk_ctx *ctx;
+	char *interface;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		return -ENOMEM;
+
+	interface = if_indextoname(ifindex, &ifname[0]);
+	if (!interface) {
+		free(ctx);
+		return -errno;
+	}
+
+	ctx->ifindex = ifindex;
+	strncpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	ctx->ifname[IFNAMSIZ - 1] = 0;
+
+	xsk->ctx = ctx;
+
+	return 0;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
+				int *xsks_map_fd)
+{
+	struct xsk_socket *xsk = _xdp;
 	struct xsk_ctx *ctx = xsk->ctx;
 	__u32 prog_id = 0;
 	int err;
@@ -584,8 +611,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 
 		err = xsk_load_xdp_prog(xsk);
 		if (err) {
-			xsk_delete_bpf_maps(xsk);
-			return err;
+			goto err_load_xdp_prog;
 		}
 	} else {
 		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
@@ -598,15 +624,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		}
 	}
 
-	if (xsk->rx)
+	if (xsk->rx) {
 		err = xsk_set_bpf_maps(xsk);
-	if (err) {
-		xsk_delete_bpf_maps(xsk);
-		close(ctx->prog_fd);
-		return err;
+		if (err) {
+			if (!prog_id) {
+				goto err_set_bpf_maps;
+			} else {
+				close(ctx->prog_fd);
+				return err;
+			}
+		}
 	}
+	if (xsks_map_fd)
+		*xsks_map_fd = ctx->xsks_map_fd;
 
 	return 0;
+
+err_set_bpf_maps:
+	close(ctx->prog_fd);
+	bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_load_xdp_prog:
+	xsk_delete_bpf_maps(xsk);
+
+	return err;
 }
 
 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
@@ -689,6 +729,40 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	return ctx;
 }
 
+static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
+{
+	free(xsk->ctx);
+	free(xsk);
+}
+
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
+{
+	xsk->ctx->xsks_map_fd = fd;
+	return xsk_set_bpf_maps(xsk);
+}
+
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
+{
+	struct xsk_socket *xsk;
+	int res;
+
+	xsk = calloc(1, sizeof(*xsk));
+	if (!xsk)
+		return -ENOMEM;
+
+	res = xsk_create_xsk_struct(ifindex, xsk);
+	if (res) {
+		free(xsk);
+		return -EINVAL;
+	}
+
+	res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+
+	xsk_destroy_xsk_struct(xsk);
+
+	return res;
+}
+
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			      const char *ifname,
 			      __u32 queue_id, struct xsk_umem *umem,
@@ -838,7 +912,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	ctx->prog_fd = -1;
 
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
-		err = xsk_setup_xdp_prog(xsk);
+		err = __xsk_setup_xdp_prog(xsk, NULL);
 		if (err)
 			goto out_mmap_tx;
 	}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 5865e082ba0b..e9f121f5d129 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -204,6 +204,11 @@ struct xsk_umem_config {
 	__u32 flags;
 };
 
+LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
+				  int *xsks_map_fd);
+LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk,
+					 int xsks_map_fd);
+
 /* Flags for the libbpf_flags field. */
 #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
 
-- 
cgit v1.2.3


From 3db980449bc3b9765c78210787bcbf4305636982 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:34 +0000
Subject: selftests/bpf: Update ima_setup.sh for busybox

losetup on busybox does not output the name of loop device on using
-f with --show. It also doesn't support -j to find the loop devices
for a given backing file. losetup is updated to use "-a" which is
available on busybox.

blkid does not support options (-s and -o) to only display the uuid, so
parse the output instead.

Not all environments have mkfs.ext4, the test requires a loop device
with a backing image file which could formatted with any filesystem.
Update to using mkfs.ext2 which is available on busybox.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-2-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 15490ccc5e55..137f2d32598f 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -3,6 +3,7 @@
 
 set -e
 set -u
+set -o pipefail
 
 IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
 TEST_BINARY="/bin/true"
@@ -23,13 +24,15 @@ setup()
 
         dd if=/dev/zero of="${mount_img}" bs=1M count=10
 
-        local loop_device="$(losetup --find --show ${mount_img})"
+        losetup -f "${mount_img}"
+        local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
 
-        mkfs.ext4 "${loop_device}"
+        mkfs.ext2 "${loop_device:?}"
         mount "${loop_device}" "${mount_dir}"
 
         cp "${TEST_BINARY}" "${mount_dir}"
-        local mount_uuid="$(blkid -s UUID -o value ${loop_device})"
+        local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
+
         echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
 }
 
@@ -38,7 +41,8 @@ cleanup() {
         local mount_img="${tmp_dir}/test.img"
         local mount_dir="${tmp_dir}/mnt"
 
-        local loop_devices=$(losetup -j ${mount_img} -O NAME --noheadings)
+        local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+
         for loop_dev in "${loop_devices}"; do
                 losetup -d $loop_dev
         done
-- 
cgit v1.2.3


From 1ee076719d4e14c005f375c50731ed44eb48fee4 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:35 +0000
Subject: selftests/bpf: Ensure securityfs mount before writing ima policy

SecurityFS may not be mounted even if it is enabled in the kernel
config. So, check if the mount exists in /proc/mounts by parsing the
file and, if not, mount it on /sys/kernel/security.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-3-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 137f2d32598f..b1ee4bf06996 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -14,6 +14,20 @@ usage()
         exit 1
 }
 
+ensure_mount_securityfs()
+{
+        local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
+
+        if [ -z "${securityfs_dir}" ]; then
+                securityfs_dir=/sys/kernel/security
+                mount -t securityfs security "${securityfs_dir}"
+        fi
+
+        if [ ! -d "${securityfs_dir}" ]; then
+                echo "${securityfs_dir}: securityfs is not mounted" && exit 1
+        fi
+}
+
 setup()
 {
         local tmp_dir="$1"
@@ -33,6 +47,7 @@ setup()
         cp "${TEST_BINARY}" "${mount_dir}"
         local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
 
+        ensure_mount_securityfs
         echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
 }
 
-- 
cgit v1.2.3


From d932e043b9d6d60113e90267ae2fbe4e946d7b08 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:36 +0000
Subject: selftests/bpf: Add config dependency on BLK_DEV_LOOP

The ima selftest restricts its scope to a test filesystem image
mounted on a loop device and prevents permanent ima policy changes for
the whole system.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-4-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 365bf9771b07..37e1f303fc11 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -43,3 +43,4 @@ CONFIG_IMA=y
 CONFIG_SECURITYFS=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_READ_POLICY=y
+CONFIG_BLK_DEV_LOOP=y
-- 
cgit v1.2.3


From ffebecd9d49542046c5ecbb410af01e016636e19 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:37 +0000
Subject: selftests/bpf: Indent ima_setup.sh with tabs.

The file was formatted with spaces instead of tabs and went unnoticed
as checkpatch.pl did not complain (probably because this is a shell
script). Re-indent it with tabs to be consistent with other scripts.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-5-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 108 +++++++++++++++----------------
 1 file changed, 54 insertions(+), 54 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index b1ee4bf06996..2bfc646bc230 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -10,90 +10,90 @@ TEST_BINARY="/bin/true"
 
 usage()
 {
-        echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
-        exit 1
+	echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
+	exit 1
 }
 
 ensure_mount_securityfs()
 {
-        local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
+	local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
 
-        if [ -z "${securityfs_dir}" ]; then
-                securityfs_dir=/sys/kernel/security
-                mount -t securityfs security "${securityfs_dir}"
-        fi
+	if [ -z "${securityfs_dir}" ]; then
+		securityfs_dir=/sys/kernel/security
+		mount -t securityfs security "${securityfs_dir}"
+	fi
 
-        if [ ! -d "${securityfs_dir}" ]; then
-                echo "${securityfs_dir}: securityfs is not mounted" && exit 1
-        fi
+	if [ ! -d "${securityfs_dir}" ]; then
+		echo "${securityfs_dir}: securityfs is not mounted" && exit 1
+	fi
 }
 
 setup()
 {
-        local tmp_dir="$1"
-        local mount_img="${tmp_dir}/test.img"
-        local mount_dir="${tmp_dir}/mnt"
-        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
-        mkdir -p ${mount_dir}
+	local tmp_dir="$1"
+	local mount_img="${tmp_dir}/test.img"
+	local mount_dir="${tmp_dir}/mnt"
+	local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+	mkdir -p ${mount_dir}
 
-        dd if=/dev/zero of="${mount_img}" bs=1M count=10
+	dd if=/dev/zero of="${mount_img}" bs=1M count=10
 
-        losetup -f "${mount_img}"
-        local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+	losetup -f "${mount_img}"
+	local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
 
-        mkfs.ext2 "${loop_device:?}"
-        mount "${loop_device}" "${mount_dir}"
+	mkfs.ext2 "${loop_device:?}"
+	mount "${loop_device}" "${mount_dir}"
 
-        cp "${TEST_BINARY}" "${mount_dir}"
-        local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
+	cp "${TEST_BINARY}" "${mount_dir}"
+	local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
 
-        ensure_mount_securityfs
-        echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+	ensure_mount_securityfs
+	echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
 }
 
 cleanup() {
-        local tmp_dir="$1"
-        local mount_img="${tmp_dir}/test.img"
-        local mount_dir="${tmp_dir}/mnt"
+	local tmp_dir="$1"
+	local mount_img="${tmp_dir}/test.img"
+	local mount_dir="${tmp_dir}/mnt"
 
-        local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+	local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
 
-        for loop_dev in "${loop_devices}"; do
-                losetup -d $loop_dev
-        done
+	for loop_dev in "${loop_devices}"; do
+		losetup -d $loop_dev
+	done
 
-        umount ${mount_dir}
-        rm -rf ${tmp_dir}
+	umount ${mount_dir}
+	rm -rf ${tmp_dir}
 }
 
 run()
 {
-        local tmp_dir="$1"
-        local mount_dir="${tmp_dir}/mnt"
-        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+	local tmp_dir="$1"
+	local mount_dir="${tmp_dir}/mnt"
+	local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
 
-        exec "${copied_bin_path}"
+	exec "${copied_bin_path}"
 }
 
 main()
 {
-        [[ $# -ne 2 ]] && usage
-
-        local action="$1"
-        local tmp_dir="$2"
-
-        [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
-
-        if [[ "${action}" == "setup" ]]; then
-                setup "${tmp_dir}"
-        elif [[ "${action}" == "cleanup" ]]; then
-                cleanup "${tmp_dir}"
-        elif [[ "${action}" == "run" ]]; then
-                run "${tmp_dir}"
-        else
-                echo "Unknown action: ${action}"
-                exit 1
-        fi
+	[[ $# -ne 2 ]] && usage
+
+	local action="$1"
+	local tmp_dir="$2"
+
+	[[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+	if [[ "${action}" == "setup" ]]; then
+		setup "${tmp_dir}"
+	elif [[ "${action}" == "cleanup" ]]; then
+		cleanup "${tmp_dir}"
+	elif [[ "${action}" == "run" ]]; then
+		run "${tmp_dir}"
+	else
+		echo "Unknown action: ${action}"
+		exit 1
+	fi
 }
 
 main "$@"
-- 
cgit v1.2.3


From 80b2b5c3a701d56de98d00d99bc9cc384fb316d9 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Wed, 2 Dec 2020 23:34:10 -0500
Subject: libbpf: Fail early when loading programs with unspecified type

Before this patch, a program with unspecified type
(BPF_PROG_TYPE_UNSPEC) would be passed to the BPF syscall, only to have
the kernel reject it with an opaque invalid argument error. This patch
makes libbpf reject such programs with a nicer error message - in
particular libbpf now tries to diagnose bad ELF section names at both
open time and load time.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203043410.59699-1-andreimatei1@gmail.com
---
 tools/lib/bpf/libbpf.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 313034117070..d6f45538444d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6629,6 +6629,16 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	char *log_buf = NULL;
 	int btf_fd, ret;
 
+	if (prog->type == BPF_PROG_TYPE_UNSPEC) {
+		/*
+		 * The program type must be set.  Most likely we couldn't find a proper
+		 * section definition at load time, and thus we didn't infer the type.
+		 */
+		pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
+			prog->name, prog->sec_name);
+		return -EINVAL;
+	}
+
 	if (!insns || !insns_cnt)
 		return -EINVAL;
 
@@ -6920,9 +6930,12 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 
 	bpf_object__for_each_program(prog, obj) {
 		prog->sec_def = find_sec_def(prog->sec_name);
-		if (!prog->sec_def)
+		if (!prog->sec_def) {
 			/* couldn't guess, but user might manually specify */
+			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
+				prog->name, prog->sec_name);
 			continue;
+		}
 
 		if (prog->sec_def->is_sleepable)
 			prog->prog_flags |= BPF_F_SLEEPABLE;
-- 
cgit v1.2.3


From 9cf309c56f7910a81fbe053b6f11c3b1f0987b12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 3 Dec 2020 10:33:06 +0100
Subject: libbpf: Sanitise map names before pinning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we added sanitising of map names before loading programs to libbpf, we
still allowed periods in the name. While the kernel will accept these for
the map names themselves, they are not allowed in file names when pinning
maps. This means that bpf_object__pin_maps() will fail if called on an
object that contains internal maps (such as sections .rodata).

Fix this by replacing periods with underscores when constructing map pin
paths. This only affects the paths generated by libbpf when
bpf_object__pin_maps() is called with a path argument. Any pin paths set
by bpf_map__set_pin_path() are unaffected, and it will still be up to the
caller to avoid invalid characters in those.

Fixes: 113e6b7e15e2 ("libbpf: Sanitise internal map names so they are not rejected by the kernel")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203093306.107676-1-toke@redhat.com
---
 tools/lib/bpf/libbpf.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d6f45538444d..b2e16efabde0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7659,6 +7659,16 @@ bool bpf_map__is_pinned(const struct bpf_map *map)
 	return map->pinned;
 }
 
+static void sanitize_pin_path(char *s)
+{
+	/* bpffs disallows periods in path names */
+	while (*s) {
+		if (*s == '.')
+			*s = '_';
+		s++;
+	}
+}
+
 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 {
 	struct bpf_map *map;
@@ -7688,6 +7698,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 				err = -ENAMETOOLONG;
 				goto err_unpin_maps;
 			}
+			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
 			continue;
@@ -7732,6 +7743,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 				return -EINVAL;
 			else if (len >= PATH_MAX)
 				return -ENAMETOOLONG;
+			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
 			continue;
-- 
cgit v1.2.3


From d6d418bd8f92aaa4c7c26d606188147c2ee0dae9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 15:13:32 -0800
Subject: libbpf: Cap retries in sys_bpf_prog_load

I've seen a situation, where a process that's under pprof constantly
generates SIGPROF which prevents program loading indefinitely.
The right thing to do probably is to disable signals in the upper
layers while loading, but it still would be nice to get some error from
libbpf instead of an endless loop.

Let's add some small retry limit to the program loading:
try loading the program 5 (arbitrary) times and give up.

v2:
* 10 -> 5 retires (Andrii Nakryiko)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201202231332.3923644-1-sdf@google.com
---
 tools/lib/bpf/bpf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d27e34133973..4025266d0fb0 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -67,11 +67,12 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 
 static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
 {
+	int retries = 5;
 	int fd;
 
 	do {
 		fd = sys_bpf(BPF_PROG_LOAD, attr, size);
-	} while (fd < 0 && errno == EAGAIN);
+	} while (fd < 0 && errno == EAGAIN && retries-- > 0);
 
 	return fd;
 }
-- 
cgit v1.2.3


From 58c185b85d0c1753b0b6a9390294bd883faf4d77 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Thu, 3 Dec 2020 12:08:50 +0000
Subject: bpf: Fix cold build of test_progs-no_alu32

This object lives inside the trunner output dir,
i.e. tools/testing/selftests/bpf/no_alu32/btf_data.o

At some point it gets copied into the parent directory during another
part of the build, but that doesn't happen when building
test_progs-no_alu32 from clean.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20201203120850.859170-1-jackmanb@google.com
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 894192c319fb..371b022d932c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -378,7 +378,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
-	$(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
+	$(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
 
 endef
 
-- 
cgit v1.2.3


From 55144f31f0d2fdd3e74ead67f1649bf577961eaa Mon Sep 17 00:00:00 2001
From: Prankur gupta <prankgup@fb.com>
Date: Wed, 2 Dec 2020 13:31:52 -0800
Subject: selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP

Adding selftests for new added functionality to set TCP_WINDOW_CLAMP
from bpf setsockopt.

Signed-off-by: Prankur gupta <prankgup@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202213152.435886-3-prankgup@fb.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h      |  1 +
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c |  4 +++
 .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 33 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf.h          |  2 ++
 4 files changed, 40 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 2915664c335d..6a9053162cf2 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -56,6 +56,7 @@ struct tcp_sock {
 	__u32	rcv_nxt;
 	__u32	snd_nxt;
 	__u32	snd_una;
+	__u32	window_clamp;
 	__u8	ecn_flags;
 	__u32	delivered;
 	__u32	delivered_ce;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index ab5281475f44..87923d2865b7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -42,6 +42,10 @@ static void verify_result(struct tcpbpf_globals *result)
 
 	/* check getsockopt for SAVED_SYN */
 	ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn");
+
+	/* check getsockopt for window_clamp */
+	ASSERT_EQ(result->window_clamp_client, 9216, "window_clamp_client");
+	ASSERT_EQ(result->window_clamp_server, 9216, "window_clamp_server");
 }
 
 static void run_test(struct tcpbpf_globals *result)
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index e85e49deba70..94f50f7e94d6 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -12,17 +12,41 @@
 #include <linux/tcp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
 #include "test_tcpbpf.h"
 
 struct tcpbpf_globals global = {};
 int _version SEC("version") = 1;
 
+/**
+ * SOL_TCP is defined in <netinet/tcp.h> while
+ * TCP_SAVED_SYN is defined in already included <linux/tcp.h>
+ */
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+static __always_inline int get_tp_window_clamp(struct bpf_sock_ops *skops)
+{
+	struct bpf_sock *sk;
+	struct tcp_sock *tp;
+
+	sk = skops->sk;
+	if (!sk)
+		return -1;
+	tp = bpf_skc_to_tcp_sock(sk);
+	if (!tp)
+		return -1;
+	return tp->window_clamp;
+}
+
 SEC("sockops")
 int bpf_testcb(struct bpf_sock_ops *skops)
 {
 	char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
 	struct bpf_sock_ops *reuse = skops;
 	struct tcphdr *thdr;
+	int window_clamp = 9216;
 	int good_call_rv = 0;
 	int bad_call_rv = 0;
 	int save_syn = 1;
@@ -75,6 +99,11 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 	global.event_map |= (1 << op);
 
 	switch (op) {
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
+				    &window_clamp, sizeof(window_clamp));
+		global.window_clamp_client = get_tp_window_clamp(skops);
+		break;
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
 		/* Test failure to set largest cb flag (assumes not defined) */
 		global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
@@ -100,6 +129,10 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 				global.tcp_saved_syn = v;
 			}
 		}
+		rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
+				    &window_clamp, sizeof(window_clamp));
+
+		global.window_clamp_server = get_tp_window_clamp(skops);
 		break;
 	case BPF_SOCK_OPS_RTO_CB:
 		break;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 0ed33521cbbb..9dd9b5590f9d 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -16,5 +16,7 @@ struct tcpbpf_globals {
 	__u32 num_close_events;
 	__u32 tcp_save_syn;
 	__u32 tcp_saved_syn;
+	__u32 window_clamp_client;
+	__u32 window_clamp_server;
 };
 #endif
-- 
cgit v1.2.3


From a19f93cfafdf85851c69bc9f677aa4f40c53610f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:23 -0800
Subject: libbpf: Add internal helper to load BTF data by FD

Add a btf_get_from_fd() helper, which constructs struct btf from in-kernel BTF
data by FD. This is used for loading module BTFs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-4-andrii@kernel.org
---
 tools/lib/bpf/btf.c             | 61 +++++++++++++++++++++++------------------
 tools/lib/bpf/libbpf_internal.h |  1 +
 2 files changed, 36 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1935e83d309c..3c3f2bc6c652 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1323,35 +1323,27 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 	return btf__str_by_offset(btf, offset);
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
 {
-	struct bpf_btf_info btf_info = { 0 };
+	struct bpf_btf_info btf_info;
 	__u32 len = sizeof(btf_info);
 	__u32 last_size;
-	int btf_fd;
+	struct btf *btf;
 	void *ptr;
 	int err;
 
-	err = 0;
-	*btf = NULL;
-	btf_fd = bpf_btf_get_fd_by_id(id);
-	if (btf_fd < 0)
-		return 0;
-
 	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
 	 * let's start with a sane default - 4KiB here - and resize it only if
 	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
 	 */
-	btf_info.btf_size = 4096;
-	last_size = btf_info.btf_size;
+	last_size = 4096;
 	ptr = malloc(last_size);
-	if (!ptr) {
-		err = -ENOMEM;
-		goto exit_free;
-	}
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
 
-	memset(ptr, 0, last_size);
+	memset(&btf_info, 0, sizeof(btf_info));
 	btf_info.btf = ptr_to_u64(ptr);
+	btf_info.btf_size = last_size;
 	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 
 	if (!err && btf_info.btf_size > last_size) {
@@ -1360,31 +1352,48 @@ int btf__get_from_id(__u32 id, struct btf **btf)
 		last_size = btf_info.btf_size;
 		temp_ptr = realloc(ptr, last_size);
 		if (!temp_ptr) {
-			err = -ENOMEM;
+			btf = ERR_PTR(-ENOMEM);
 			goto exit_free;
 		}
 		ptr = temp_ptr;
-		memset(ptr, 0, last_size);
+
+		len = sizeof(btf_info);
+		memset(&btf_info, 0, sizeof(btf_info));
 		btf_info.btf = ptr_to_u64(ptr);
+		btf_info.btf_size = last_size;
+
 		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 	}
 
 	if (err || btf_info.btf_size > last_size) {
-		err = errno;
+		btf = err ? ERR_PTR(-errno) : ERR_PTR(-E2BIG);
 		goto exit_free;
 	}
 
-	*btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
-	if (IS_ERR(*btf)) {
-		err = PTR_ERR(*btf);
-		*btf = NULL;
-	}
+	btf = btf_new(ptr, btf_info.btf_size, base_btf);
 
 exit_free:
-	close(btf_fd);
 	free(ptr);
+	return btf;
+}
 
-	return err;
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+	struct btf *res;
+	int btf_fd;
+
+	*btf = NULL;
+	btf_fd = bpf_btf_get_fd_by_id(id);
+	if (btf_fd < 0)
+		return -errno;
+
+	res = btf_get_from_fd(btf_fd, NULL);
+	close(btf_fd);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	*btf = res;
+	return 0;
 }
 
 int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index d99bc847bf84..e569ae63808e 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -155,6 +155,7 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 			     __u32 *size);
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 				__u32 *off);
+struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
 
 struct btf_ext_info {
 	/*
-- 
cgit v1.2.3


From 0f7515ca7cddadabe04e28e20a257b1bbb6cb98a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:24 -0800
Subject: libbpf: Refactor CO-RE relocs to not assume a single BTF object

Refactor CO-RE relocation candidate search to not expect a single BTF, rather
return all candidate types with their corresponding BTF objects. This will
allow to extend CO-RE relocations to accommodate kernel module BTFs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-5-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 187 +++++++++++++++++++++++++++++--------------------
 1 file changed, 111 insertions(+), 76 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b2e16efabde0..2e4fa3ce6b94 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -462,11 +462,14 @@ struct bpf_object {
 	struct list_head list;
 
 	struct btf *btf;
+	struct btf_ext *btf_ext;
+
 	/* Parse and load BTF vmlinux if any of the programs in the object need
 	 * it at load time.
 	 */
 	struct btf *btf_vmlinux;
-	struct btf_ext *btf_ext;
+	/* vmlinux BTF override for CO-RE relocations */
+	struct btf *btf_vmlinux_override;
 
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
@@ -4600,46 +4603,43 @@ static size_t bpf_core_essential_name_len(const char *name)
 	return n;
 }
 
-/* dynamically sized list of type IDs */
-struct ids_vec {
-	__u32 *data;
+struct core_cand
+{
+	const struct btf *btf;
+	const struct btf_type *t;
+	const char *name;
+	__u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct core_cand_list {
+	struct core_cand *cands;
 	int len;
 };
 
-static void bpf_core_free_cands(struct ids_vec *cand_ids)
+static void bpf_core_free_cands(struct core_cand_list *cands)
 {
-	free(cand_ids->data);
-	free(cand_ids);
+	free(cands->cands);
+	free(cands);
 }
 
-static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
-					   __u32 local_type_id,
-					   const struct btf *targ_btf)
+static int bpf_core_add_cands(struct core_cand *local_cand,
+			      size_t local_essent_len,
+			      const struct btf *targ_btf,
+			      const char *targ_btf_name,
+			      int targ_start_id,
+			      struct core_cand_list *cands)
 {
-	size_t local_essent_len, targ_essent_len;
-	const char *local_name, *targ_name;
-	const struct btf_type *t, *local_t;
-	struct ids_vec *cand_ids;
-	__u32 *new_ids;
-	int i, err, n;
-
-	local_t = btf__type_by_id(local_btf, local_type_id);
-	if (!local_t)
-		return ERR_PTR(-EINVAL);
-
-	local_name = btf__name_by_offset(local_btf, local_t->name_off);
-	if (str_is_empty(local_name))
-		return ERR_PTR(-EINVAL);
-	local_essent_len = bpf_core_essential_name_len(local_name);
-
-	cand_ids = calloc(1, sizeof(*cand_ids));
-	if (!cand_ids)
-		return ERR_PTR(-ENOMEM);
+	struct core_cand *new_cands, *cand;
+	const struct btf_type *t;
+	const char *targ_name;
+	size_t targ_essent_len;
+	int n, i;
 
 	n = btf__get_nr_types(targ_btf);
-	for (i = 1; i <= n; i++) {
+	for (i = targ_start_id; i <= n; i++) {
 		t = btf__type_by_id(targ_btf, i);
-		if (btf_kind(t) != btf_kind(local_t))
+		if (btf_kind(t) != btf_kind(local_cand->t))
 			continue;
 
 		targ_name = btf__name_by_offset(targ_btf, t->name_off);
@@ -4650,25 +4650,62 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 		if (targ_essent_len != local_essent_len)
 			continue;
 
-		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
-			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
-				 local_type_id, btf_kind_str(local_t),
-				 local_name, i, btf_kind_str(t), targ_name);
-			new_ids = libbpf_reallocarray(cand_ids->data,
-						      cand_ids->len + 1,
-						      sizeof(*cand_ids->data));
-			if (!new_ids) {
-				err = -ENOMEM;
-				goto err_out;
-			}
-			cand_ids->data = new_ids;
-			cand_ids->data[cand_ids->len++] = i;
-		}
+		if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
+			continue;
+
+		pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
+			 local_cand->id, btf_kind_str(local_cand->t),
+			 local_cand->name, i, btf_kind_str(t), targ_name,
+			 targ_btf_name);
+		new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
+					      sizeof(*cands->cands));
+		if (!new_cands)
+			return -ENOMEM;
+
+		cand = &new_cands[cands->len];
+		cand->btf = targ_btf;
+		cand->t = t;
+		cand->name = targ_name;
+		cand->id = i;
+
+		cands->cands = new_cands;
+		cands->len++;
 	}
-	return cand_ids;
-err_out:
-	bpf_core_free_cands(cand_ids);
-	return ERR_PTR(err);
+	return 0;
+}
+
+static struct core_cand_list *
+bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
+{
+	struct core_cand local_cand = {};
+	struct core_cand_list *cands;
+	size_t local_essent_len;
+	int err;
+
+	local_cand.btf = local_btf;
+	local_cand.t = btf__type_by_id(local_btf, local_type_id);
+	if (!local_cand.t)
+		return ERR_PTR(-EINVAL);
+
+	local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
+	if (str_is_empty(local_cand.name))
+		return ERR_PTR(-EINVAL);
+	local_essent_len = bpf_core_essential_name_len(local_cand.name);
+
+	cands = calloc(1, sizeof(*cands));
+	if (!cands)
+		return ERR_PTR(-ENOMEM);
+
+	/* Attempt to find target candidates in vmlinux BTF first */
+	err = bpf_core_add_cands(&local_cand, local_essent_len,
+				 obj->btf_vmlinux_override ?: obj->btf_vmlinux,
+				 "vmlinux", 1, cands);
+	if (err) {
+		bpf_core_free_cands(cands);
+		return ERR_PTR(err);
+	}
+
+	return cands;
 }
 
 /* Check two types for compatibility for the purpose of field access
@@ -5661,7 +5698,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 			       const struct bpf_core_relo *relo,
 			       int relo_idx,
 			       const struct btf *local_btf,
-			       const struct btf *targ_btf,
 			       struct hashmap *cand_cache)
 {
 	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
@@ -5669,8 +5705,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	struct bpf_core_relo_res cand_res, targ_res;
 	const struct btf_type *local_type;
 	const char *local_name;
-	struct ids_vec *cand_ids;
-	__u32 local_id, cand_id;
+	struct core_cand_list *cands = NULL;
+	__u32 local_id;
 	const char *spec_str;
 	int i, j, err;
 
@@ -5717,24 +5753,24 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 		return -EOPNOTSUPP;
 	}
 
-	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
-		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
-		if (IS_ERR(cand_ids)) {
+	if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
+		if (IS_ERR(cands)) {
 			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
 				prog->name, relo_idx, local_id, btf_kind_str(local_type),
-				local_name, PTR_ERR(cand_ids));
-			return PTR_ERR(cand_ids);
+				local_name, PTR_ERR(cands));
+			return PTR_ERR(cands);
 		}
-		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+		err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
 		if (err) {
-			bpf_core_free_cands(cand_ids);
+			bpf_core_free_cands(cands);
 			return err;
 		}
 	}
 
-	for (i = 0, j = 0; i < cand_ids->len; i++) {
-		cand_id = cand_ids->data[i];
-		err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
+	for (i = 0, j = 0; i < cands->len; i++) {
+		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+					  cands->cands[i].id, &cand_spec);
 		if (err < 0) {
 			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
 				prog->name, relo_idx, i);
@@ -5778,7 +5814,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 			return -EINVAL;
 		}
 
-		cand_ids->data[j++] = cand_spec.root_type_id;
+		cands->cands[j++] = cands->cands[i];
 	}
 
 	/*
@@ -5790,7 +5826,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	 * depending on relo's kind.
 	 */
 	if (j > 0)
-		cand_ids->len = j;
+		cands->len = j;
 
 	/*
 	 * If no candidates were found, it might be both a programmer error,
@@ -5834,20 +5870,19 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 	struct hashmap_entry *entry;
 	struct hashmap *cand_cache = NULL;
 	struct bpf_program *prog;
-	struct btf *targ_btf;
 	const char *sec_name;
 	int i, err = 0, insn_idx, sec_idx;
 
 	if (obj->btf_ext->core_relo_info.len == 0)
 		return 0;
 
-	if (targ_btf_path)
-		targ_btf = btf__parse(targ_btf_path, NULL);
-	else
-		targ_btf = obj->btf_vmlinux;
-	if (IS_ERR_OR_NULL(targ_btf)) {
-		pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
-		return PTR_ERR(targ_btf);
+	if (targ_btf_path) {
+		obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
+		if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
+			err = PTR_ERR(obj->btf_vmlinux_override);
+			pr_warn("failed to parse target BTF: %d\n", err);
+			return err;
+		}
 	}
 
 	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
@@ -5899,8 +5934,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 			if (!prog->load)
 				continue;
 
-			err = bpf_core_apply_relo(prog, rec, i, obj->btf,
-						  targ_btf, cand_cache);
+			err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache);
 			if (err) {
 				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
 					prog->name, i, err);
@@ -5911,8 +5945,9 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 
 out:
 	/* obj->btf_vmlinux is freed at the end of object load phase */
-	if (targ_btf != obj->btf_vmlinux)
-		btf__free(targ_btf);
+	btf__free(obj->btf_vmlinux_override);
+	obj->btf_vmlinux_override = NULL;
+
 	if (!IS_ERR_OR_NULL(cand_cache)) {
 		hashmap__for_each_entry(cand_cache, entry, i) {
 			bpf_core_free_cands(entry->value);
-- 
cgit v1.2.3


From 4f33a53d56000cfa67e2e4e8a5dac08f084a979b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:25 -0800
Subject: libbpf: Add kernel module BTF support for CO-RE relocations

Teach libbpf to search for candidate types for CO-RE relocations across kernel
modules BTFs, in addition to vmlinux BTF. If at least one candidate type is
found in vmlinux BTF, kernel module BTFs are not iterated. If vmlinux BTF has
no matching candidates, then find all kernel module BTFs and search for all
matching candidates across all of them.

Kernel's support for module BTFs are inferred from the support for BTF name
pointer in BPF UAPI.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-6-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 169 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2e4fa3ce6b94..ca20e493726d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -176,6 +176,8 @@ enum kern_feature_id {
 	FEAT_PROBE_READ_KERN,
 	/* BPF_PROG_BIND_MAP is supported */
 	FEAT_PROG_BIND_MAP,
+	/* Kernel support for module BTFs */
+	FEAT_MODULE_BTF,
 	__FEAT_CNT,
 };
 
@@ -402,6 +404,12 @@ struct extern_desc {
 
 static LIST_HEAD(bpf_objects_list);
 
+struct module_btf {
+	struct btf *btf;
+	char *name;
+	__u32 id;
+};
+
 struct bpf_object {
 	char name[BPF_OBJ_NAME_LEN];
 	char license[64];
@@ -470,6 +478,11 @@ struct bpf_object {
 	struct btf *btf_vmlinux;
 	/* vmlinux BTF override for CO-RE relocations */
 	struct btf *btf_vmlinux_override;
+	/* Lazily initialized kernel module BTFs */
+	struct module_btf *btf_modules;
+	bool btf_modules_loaded;
+	size_t btf_module_cnt;
+	size_t btf_module_cap;
 
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
@@ -3960,6 +3973,35 @@ static int probe_prog_bind_map(void)
 	return ret >= 0;
 }
 
+static int probe_module_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+	struct bpf_btf_info info;
+	__u32 len = sizeof(info);
+	char name[16];
+	int fd, err;
+
+	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
+	if (fd < 0)
+		return 0; /* BTF not supported at all */
+
+	memset(&info, 0, sizeof(info));
+	info.name = ptr_to_u64(name);
+	info.name_len = sizeof(name);
+
+	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+	 * kernel's module BTF support coincides with support for
+	 * name/name_len fields in struct bpf_btf_info.
+	 */
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	close(fd);
+	return !err;
+}
+
 enum kern_feature_result {
 	FEAT_UNKNOWN = 0,
 	FEAT_SUPPORTED = 1,
@@ -4003,7 +4045,10 @@ static struct kern_feature_desc {
 	},
 	[FEAT_PROG_BIND_MAP] = {
 		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
-	}
+	},
+	[FEAT_MODULE_BTF] = {
+		"module BTF support", probe_module_btf,
+	},
 };
 
 static bool kernel_supports(enum kern_feature_id feat_id)
@@ -4674,13 +4719,96 @@ static int bpf_core_add_cands(struct core_cand *local_cand,
 	return 0;
 }
 
+static int load_module_btfs(struct bpf_object *obj)
+{
+	struct bpf_btf_info info;
+	struct module_btf *mod_btf;
+	struct btf *btf;
+	char name[64];
+	__u32 id = 0, len;
+	int err, fd;
+
+	if (obj->btf_modules_loaded)
+		return 0;
+
+	/* don't do this again, even if we find no module BTFs */
+	obj->btf_modules_loaded = true;
+
+	/* kernel too old to support module BTFs */
+	if (!kernel_supports(FEAT_MODULE_BTF))
+		return 0;
+
+	while (true) {
+		err = bpf_btf_get_next_id(id, &id);
+		if (err && errno == ENOENT)
+			return 0;
+		if (err) {
+			err = -errno;
+			pr_warn("failed to iterate BTF objects: %d\n", err);
+			return err;
+		}
+
+		fd = bpf_btf_get_fd_by_id(id);
+		if (fd < 0) {
+			if (errno == ENOENT)
+				continue; /* expected race: BTF was unloaded */
+			err = -errno;
+			pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
+			return err;
+		}
+
+		len = sizeof(info);
+		memset(&info, 0, sizeof(info));
+		info.name = ptr_to_u64(name);
+		info.name_len = sizeof(name);
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			err = -errno;
+			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
+			close(fd);
+			return err;
+		}
+
+		/* ignore non-module BTFs */
+		if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
+			close(fd);
+			continue;
+		}
+
+		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
+		close(fd);
+		if (IS_ERR(btf)) {
+			pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
+				name, id, PTR_ERR(btf));
+			return PTR_ERR(btf);
+		}
+
+		err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
+				     sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+		if (err)
+			return err;
+
+		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
+
+		mod_btf->btf = btf;
+		mod_btf->id = id;
+		mod_btf->name = strdup(name);
+		if (!mod_btf->name)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static struct core_cand_list *
 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 {
 	struct core_cand local_cand = {};
 	struct core_cand_list *cands;
+	const struct btf *main_btf;
 	size_t local_essent_len;
-	int err;
+	int err, i;
 
 	local_cand.btf = local_btf;
 	local_cand.t = btf__type_by_id(local_btf, local_type_id);
@@ -4697,15 +4825,38 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l
 		return ERR_PTR(-ENOMEM);
 
 	/* Attempt to find target candidates in vmlinux BTF first */
-	err = bpf_core_add_cands(&local_cand, local_essent_len,
-				 obj->btf_vmlinux_override ?: obj->btf_vmlinux,
-				 "vmlinux", 1, cands);
-	if (err) {
-		bpf_core_free_cands(cands);
-		return ERR_PTR(err);
+	main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
+	err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
+	if (err)
+		goto err_out;
+
+	/* if vmlinux BTF has any candidate, don't got for module BTFs */
+	if (cands->len)
+		return cands;
+
+	/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
+	if (obj->btf_vmlinux_override)
+		return cands;
+
+	/* now look through module BTFs, trying to still find candidates */
+	err = load_module_btfs(obj);
+	if (err)
+		goto err_out;
+
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		err = bpf_core_add_cands(&local_cand, local_essent_len,
+					 obj->btf_modules[i].btf,
+					 obj->btf_modules[i].name,
+					 btf__get_nr_types(obj->btf_vmlinux) + 1,
+					 cands);
+		if (err)
+			goto err_out;
 	}
 
 	return cands;
+err_out:
+	bpf_core_free_cands(cands);
+	return ERR_PTR(err);
 }
 
 /* Check two types for compatibility for the purpose of field access
@@ -5756,7 +5907,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
 		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
 		if (IS_ERR(cands)) {
-			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
 				prog->name, relo_idx, local_id, btf_kind_str(local_type),
 				local_name, PTR_ERR(cands));
 			return PTR_ERR(cands);
@@ -5944,7 +6095,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 	}
 
 out:
-	/* obj->btf_vmlinux is freed at the end of object load phase */
+	/* obj->btf_vmlinux and module BTFs are freed after object load */
 	btf__free(obj->btf_vmlinux_override);
 	obj->btf_vmlinux_override = NULL;
 
@@ -7316,6 +7467,14 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
 	err = err ? : bpf_object__load_progs(obj, attr->log_level);
 
+	/* clean up module BTFs */
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		btf__free(obj->btf_modules[i].btf);
+		free(obj->btf_modules[i].name);
+	}
+	free(obj->btf_modules);
+
+	/* clean up vmlinux BTF */
 	btf__free(obj->btf_vmlinux);
 	obj->btf_vmlinux = NULL;
 
-- 
cgit v1.2.3


From 9f7fa225894c7fcb014f3699a402fcc4d896cb1c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:26 -0800
Subject: selftests/bpf: Add bpf_testmod kernel module for testing

Add bpf_testmod module, which is conceptually out-of-tree module and provides
ways for selftests/bpf to test various kernel module-related functionality:
raw tracepoint, fentry/fexit/fmod_ret, etc. This module will be auto-loaded by
test_progs test runner and expected by some of selftests to be present and
loaded.

Pahole currently isn't able to generate BTF for static functions in kernel
modules, so make sure traced function is global.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-7-andrii@kernel.org
---
 tools/testing/selftests/bpf/.gitignore             |  1 +
 tools/testing/selftests/bpf/Makefile               | 12 +++--
 tools/testing/selftests/bpf/bpf_testmod/.gitignore |  6 +++
 tools/testing/selftests/bpf/bpf_testmod/Makefile   | 20 ++++++++
 .../selftests/bpf/bpf_testmod/bpf_testmod-events.h | 36 +++++++++++++
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        | 52 +++++++++++++++++++
 .../selftests/bpf/bpf_testmod/bpf_testmod.h        | 14 +++++
 tools/testing/selftests/bpf/test_progs.c           | 59 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h           |  1 +
 9 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/.gitignore
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/Makefile
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 395ae040ce1f..752d8edddc66 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -35,3 +35,4 @@ test_cpp
 /tools
 /runqslower
 /bench
+*.ko
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 371b022d932c..ac25ba5d0d6c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -80,7 +80,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower bench
+	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko
 
 TEST_CUSTOM_PROGS = urandom_read
 
@@ -104,6 +104,7 @@ OVERRIDE_TARGETS := 1
 override define CLEAN
 	$(call msg,CLEAN)
 	$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+	$(Q)$(MAKE) -C bpf_testmod clean
 endef
 
 include ../lib.mk
@@ -136,6 +137,11 @@ $(OUTPUT)/urandom_read: urandom_read.c
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1
 
+$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
+	$(call msg,MOD,,$@)
+	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
+	$(Q)cp bpf_testmod/bpf_testmod.ko $@
+
 $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
@@ -388,7 +394,7 @@ TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
 			 btf_helpers.c	flow_dissector_load.h
-TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -460,4 +466,4 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
-	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc)
+	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/bpf_testmod/.gitignore
new file mode 100644
index 000000000000..ded513777281
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/.gitignore
@@ -0,0 +1,6 @@
+*.mod
+*.mod.c
+*.o
+.ko
+/Module.symvers
+/modules.order
diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile
new file mode 100644
index 000000000000..15cb36c4483a
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/Makefile
@@ -0,0 +1,20 @@
+BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_testmod.ko
+
+obj-m += bpf_testmod.o
+CFLAGS_bpf_testmod.o = -I$(src)
+
+all:
+	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
+
+clean:
+	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
new file mode 100644
index 000000000000..b83ea448bc79
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_testmod
+
+#if !defined(_BPF_TESTMOD_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _BPF_TESTMOD_EVENTS_H
+
+#include <linux/tracepoint.h>
+#include "bpf_testmod.h"
+
+TRACE_EVENT(bpf_testmod_test_read,
+	TP_PROTO(struct task_struct *task, struct bpf_testmod_test_read_ctx *ctx),
+	TP_ARGS(task, ctx),
+	TP_STRUCT__entry(
+		__field(pid_t, pid)
+		__array(char, comm, TASK_COMM_LEN)
+		__field(loff_t, off)
+		__field(size_t, len)
+	),
+	TP_fast_assign(
+		__entry->pid = task->pid;
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->off = ctx->off;
+		__entry->len = ctx->len;
+	),
+	TP_printk("pid=%d comm=%s off=%llu len=%zu",
+		  __entry->pid, __entry->comm, __entry->off, __entry->len)
+);
+
+#endif /* _BPF_TESTMOD_EVENTS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE bpf_testmod-events
+#include <trace/define_trace.h>
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
new file mode 100644
index 000000000000..2df19d73ca49
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/error-injection.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysfs.h>
+#include <linux/tracepoint.h>
+#include "bpf_testmod.h"
+
+#define CREATE_TRACE_POINTS
+#include "bpf_testmod-events.h"
+
+noinline ssize_t
+bpf_testmod_test_read(struct file *file, struct kobject *kobj,
+		      struct bin_attribute *bin_attr,
+		      char *buf, loff_t off, size_t len)
+{
+	struct bpf_testmod_test_read_ctx ctx = {
+		.buf = buf,
+		.off = off,
+		.len = len,
+	};
+
+	trace_bpf_testmod_test_read(current, &ctx);
+
+	return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_read);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
+
+static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
+	.attr = { .name = "bpf_testmod", .mode = 0444, },
+	.read = bpf_testmod_test_read,
+};
+
+static int bpf_testmod_init(void)
+{
+	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+}
+
+static void bpf_testmod_exit(void)
+{
+	return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+}
+
+module_init(bpf_testmod_init);
+module_exit(bpf_testmod_exit);
+
+MODULE_AUTHOR("Andrii Nakryiko");
+MODULE_DESCRIPTION("BPF selftests module");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
new file mode 100644
index 000000000000..b81adfedb4f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef _BPF_TESTMOD_H
+#define _BPF_TESTMOD_H
+
+#include <linux/types.h>
+
+struct bpf_testmod_test_read_ctx {
+	char *buf;
+	loff_t off;
+	size_t len;
+};
+
+#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 22943b58d752..17587754b7a7 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -360,6 +360,58 @@ err:
 	return -1;
 }
 
+static int finit_module(int fd, const char *param_values, int flags)
+{
+	return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int delete_module(const char *name, int flags)
+{
+	return syscall(__NR_delete_module, name, flags);
+}
+
+static void unload_bpf_testmod(void)
+{
+	if (delete_module("bpf_testmod", 0)) {
+		if (errno == ENOENT) {
+			if (env.verbosity > VERBOSE_NONE)
+				fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
+			return;
+		}
+		fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
+		exit(1);
+	}
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
+}
+
+static int load_bpf_testmod(void)
+{
+	int fd;
+
+	/* ensure previous instance of the module is unloaded */
+	unload_bpf_testmod();
+
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout, "Loading bpf_testmod.ko...\n");
+
+	fd = open("bpf_testmod.ko", O_RDONLY);
+	if (fd < 0) {
+		fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
+		return -ENOENT;
+	}
+	if (finit_module(fd, "", 0)) {
+		fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
+		close(fd);
+		return -EINVAL;
+	}
+	close(fd);
+
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
+	return 0;
+}
+
 /* extern declarations for test funcs */
 #define DEFINE_TEST(name) extern void test_##name(void);
 #include <prog_tests/tests.h>
@@ -678,6 +730,11 @@ int main(int argc, char **argv)
 
 	save_netns();
 	stdio_hijack();
+	env.has_testmod = true;
+	if (load_bpf_testmod()) {
+		fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
+		env.has_testmod = false;
+	}
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
 
@@ -722,6 +779,8 @@ int main(int argc, char **argv)
 		if (test->need_cgroup_cleanup)
 			cleanup_cgroup_environment();
 	}
+	if (env.has_testmod)
+		unload_bpf_testmod();
 	stdio_restore();
 
 	if (env.get_test_cnt) {
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index d6b14853f3bc..115953243f62 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -66,6 +66,7 @@ struct test_env {
 	enum verbosity verbosity;
 
 	bool jit_enabled;
+	bool has_testmod;
 	bool get_test_cnt;
 	bool list_test_names;
 
-- 
cgit v1.2.3


From 5ed31472b9ad6373a0a24bc21186b5eac999213d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:27 -0800
Subject: selftests/bpf: Add support for marking sub-tests as skipped

Previously skipped sub-tests would be counted as passing with ":OK" appened
in the log. Change that to be accounted as ":SKIP".

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-8-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_progs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 17587754b7a7..5ef081bdae4e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -149,15 +149,15 @@ void test__end_subtest()
 
 	if (sub_error_cnt)
 		env.fail_cnt++;
-	else
+	else if (test->skip_cnt == 0)
 		env.sub_succ_cnt++;
 	skip_account();
 
 	dump_test_log(test, sub_error_cnt);
 
 	fprintf(env.stdout, "#%d/%d %s:%s\n",
-	       test->test_num, test->subtest_num,
-	       test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+	       test->test_num, test->subtest_num, test->subtest_name,
+	       sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
 
 	free(test->subtest_name);
 	test->subtest_name = NULL;
-- 
cgit v1.2.3


From 6bcd39d366b64318562785d5b47c2837e3a53ae5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:28 -0800
Subject: selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF

Add a self-tests validating libbpf is able to perform CO-RE relocations
against the type defined in kernel module BTF. if bpf_testmod.o is not
supported by the kernel (e.g., due to version mismatch), skip tests, instead
of failing.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-9-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 79 +++++++++++++++++++---
 .../testing/selftests/bpf/progs/core_reloc_types.h | 17 +++++
 .../selftests/bpf/progs/test_core_reloc_module.c   | 66 ++++++++++++++++++
 3 files changed, 151 insertions(+), 11 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_module.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 30e40ff4b0d8..bb980848cd77 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include "progs/core_reloc_types.h"
+#include "bpf_testmod/bpf_testmod.h"
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <bpf/btf.h>
@@ -9,6 +10,30 @@ static int duration = 0;
 
 #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
 
+#define MODULES_CASE(name, sec_name, tp_name) {				\
+	.case_name = name,						\
+	.bpf_obj_file = "test_core_reloc_module.o",			\
+	.btf_src_file = NULL, /* find in kernel module BTFs */		\
+	.input = "",							\
+	.input_len = 0,							\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_module_output) {	\
+		.read_ctx_sz = sizeof(struct bpf_testmod_test_read_ctx),\
+		.read_ctx_exists = true,				\
+		.buf_exists = true,					\
+		.len_exists = true,					\
+		.off_exists = true,					\
+		.len = 123,						\
+		.off = 0,						\
+		.comm = "test_progs",					\
+		.comm_len = sizeof("test_progs"),			\
+	},								\
+	.output_len = sizeof(struct core_reloc_module_output),		\
+	.prog_sec_name = sec_name,					\
+	.raw_tp_name = tp_name,						\
+	.trigger = trigger_module_test_read,				\
+	.needs_testmod = true,						\
+}
+
 #define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
 	.a = 42,							\
 	.b = 0xc001,							\
@@ -211,7 +236,7 @@ static int duration = 0;
 	.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output)	\
 		__VA_ARGS__,						\
 	.output_len = sizeof(struct core_reloc_bitfields_output),	\
-	.direct_raw_tp = true,						\
+	.prog_sec_name = "tp_btf/sys_enter",				\
 }
 
 
@@ -222,7 +247,7 @@ static int duration = 0;
 }, {									\
 	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o",	\
 			      "direct:", name),				\
-	.direct_raw_tp = true,						\
+	.prog_sec_name = "tp_btf/sys_enter",				\
 	.fails = true,							\
 }
 
@@ -309,6 +334,7 @@ static int duration = 0;
 struct core_reloc_test_case;
 
 typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+typedef int (*trigger_test_fn)(const struct core_reloc_test_case *test);
 
 struct core_reloc_test_case {
 	const char *case_name;
@@ -319,9 +345,12 @@ struct core_reloc_test_case {
 	const char *output;
 	int output_len;
 	bool fails;
+	bool needs_testmod;
 	bool relaxed_core_relocs;
-	bool direct_raw_tp;
+	const char *prog_sec_name;
+	const char *raw_tp_name;
 	setup_test_fn setup;
+	trigger_test_fn trigger;
 };
 
 static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
@@ -451,6 +480,23 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test)
 	return 0;
 }
 
+static int trigger_module_test_read(const struct core_reloc_test_case *test)
+{
+	struct core_reloc_module_output *exp = (void *)test->output;
+	int fd, err;
+
+	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+	err = -errno;
+	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+		return err;
+
+	read(fd, NULL, exp->len); /* request expected number of bytes */
+	close(fd);
+
+	return 0;
+}
+
+
 static struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
@@ -467,6 +513,9 @@ static struct core_reloc_test_case test_cases[] = {
 		.output_len = sizeof(struct core_reloc_kernel_output),
 	},
 
+	/* validate we can find kernel module BTF types for relocs/attach */
+	MODULES_CASE("module", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
+
 	/* validate BPF program can use multiple flavors to match against
 	 * single target BTF type
 	 */
@@ -779,6 +828,11 @@ void test_core_reloc(void)
 		if (!test__start_subtest(test_case->case_name))
 			continue;
 
+		if (test_case->needs_testmod && !env.has_testmod) {
+			test__skip();
+			continue;
+		}
+
 		if (test_case->setup) {
 			err = test_case->setup(test_case);
 			if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
@@ -790,13 +844,11 @@ void test_core_reloc(void)
 			  test_case->bpf_obj_file, PTR_ERR(obj)))
 			continue;
 
-		/* for typed raw tracepoints, NULL should be specified */
-		if (test_case->direct_raw_tp) {
-			probe_name = "tp_btf/sys_enter";
-			tp_name = NULL;
-		} else {
-			probe_name = "raw_tracepoint/sys_enter";
-			tp_name = "sys_enter";
+		probe_name = "raw_tracepoint/sys_enter";
+		tp_name = "sys_enter";
+		if (test_case->prog_sec_name) {
+			probe_name = test_case->prog_sec_name;
+			tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
 		}
 
 		prog = bpf_object__find_program_by_title(obj, probe_name);
@@ -837,7 +889,12 @@ void test_core_reloc(void)
 			goto cleanup;
 
 		/* trigger test run */
-		usleep(1);
+		if (test_case->trigger) {
+			if (!ASSERT_OK(test_case->trigger(test_case), "test_trigger"))
+				goto cleanup;
+		} else {
+			usleep(1);
+		}
 
 		if (data->skip) {
 			test__skip();
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index e6e616cb7bc9..9a2850850121 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -15,6 +15,23 @@ struct core_reloc_kernel_output {
 	int comm_len;
 };
 
+/*
+ * MODULE
+ */
+
+struct core_reloc_module_output {
+	long long len;
+	long long off;
+	int read_ctx_sz;
+	bool read_ctx_exists;
+	bool buf_exists;
+	bool len_exists;
+	bool off_exists;
+	/* we have test_progs[-flavor], so cut flavor part */
+	char comm[sizeof("test_progs")];
+	int comm_len;
+};
+
 /*
  * FLAVORS
  */
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
new file mode 100644
index 000000000000..d1840c1a9d36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_testmod_test_read_ctx {
+	/* field order is mixed up */
+	size_t len;
+	char *buf;
+	loff_t off;
+} __attribute__((preserve_access_index));
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+	uint64_t my_pid_tgid;
+} data = {};
+
+struct core_reloc_module_output {
+	long long len;
+	long long off;
+	int read_ctx_sz;
+	bool read_ctx_exists;
+	bool buf_exists;
+	bool len_exists;
+	bool off_exists;
+	/* we have test_progs[-flavor], so cut flavor part */
+	char comm[sizeof("test_progs")];
+	int comm_len;
+};
+
+SEC("raw_tp/bpf_testmod_test_read")
+int BPF_PROG(test_core_module,
+	     struct task_struct *task,
+	     struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	struct core_reloc_module_output *out = (void *)&data.out;
+	__u64 pid_tgid = bpf_get_current_pid_tgid();
+	__u32 real_tgid = (__u32)(pid_tgid >> 32);
+	__u32 real_pid = (__u32)pid_tgid;
+
+	if (data.my_pid_tgid != pid_tgid)
+		return 0;
+
+	if (BPF_CORE_READ(task, pid) != real_pid || BPF_CORE_READ(task, tgid) != real_tgid)
+		return 0;
+
+	out->len = BPF_CORE_READ(read_ctx, len);
+	out->off = BPF_CORE_READ(read_ctx, off);
+
+	out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);
+	out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx);
+	out->buf_exists = bpf_core_field_exists(read_ctx->buf);
+	out->off_exists = bpf_core_field_exists(read_ctx->off);
+	out->len_exists = bpf_core_field_exists(read_ctx->len);
+
+	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:30 -0800
Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs

Add ability for user-space programs to specify non-vmlinux BTF when attaching
BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this,
attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD
of a module or vmlinux BTF object. For backwards compatibility reasons,
0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org
---
 tools/include/uapi/linux/bpf.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c3458ec1f30a..1233f14f659f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -557,7 +557,12 @@ union bpf_attr {
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
-		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		union {
+			/* valid prog_fd to attach to bpf prog */
+			__u32		attach_prog_fd;
+			/* or valid module BTF object fd or 0 to attach to vmlinux */
+			__u32		attach_btf_obj_fd;
+		};
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-- 
cgit v1.2.3


From 6aef10a481a3f42c8021fe410e07440c0d71a5fc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:31 -0800
Subject: libbpf: Factor out low-level BPF program loading helper

Refactor low-level API for BPF program loading to not rely on public API
types. This allows painless extension without constant efforts to cleverly not
break backwards compatibility.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-12-andrii@kernel.org
---
 tools/lib/bpf/bpf.c             | 100 +++++++++++++++++++++++++++-------------
 tools/lib/bpf/libbpf.c          |  34 +++++++-------
 tools/lib/bpf/libbpf_internal.h |  29 ++++++++++++
 3 files changed, 113 insertions(+), 50 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 4025266d0fb0..5d681ce32b37 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -215,59 +215,52 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
 	return info;
 }
 
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-			   char *log_buf, size_t log_buf_sz)
+int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 {
 	void *finfo = NULL, *linfo = NULL;
 	union bpf_attr attr;
-	__u32 log_level;
 	int fd;
 
-	if (!load_attr || !log_buf != !log_buf_sz)
+	if (!load_attr->log_buf != !load_attr->log_buf_sz)
 		return -EINVAL;
 
-	log_level = load_attr->log_level;
-	if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
+	if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
 		return -EINVAL;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
-	if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
-	    attr.prog_type == BPF_PROG_TYPE_LSM) {
-		attr.attach_btf_id = load_attr->attach_btf_id;
-	} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
-		   attr.prog_type == BPF_PROG_TYPE_EXT) {
-		attr.attach_btf_id = load_attr->attach_btf_id;
-		attr.attach_prog_fd = load_attr->attach_prog_fd;
-	} else {
-		attr.prog_ifindex = load_attr->prog_ifindex;
-		attr.kern_version = load_attr->kern_version;
-	}
-	attr.insn_cnt = (__u32)load_attr->insns_cnt;
+
+	attr.attach_btf_id = load_attr->attach_btf_id;
+	attr.attach_prog_fd = load_attr->attach_prog_fd;
+
+	attr.prog_ifindex = load_attr->prog_ifindex;
+	attr.kern_version = load_attr->kern_version;
+
+	attr.insn_cnt = (__u32)load_attr->insn_cnt;
 	attr.insns = ptr_to_u64(load_attr->insns);
 	attr.license = ptr_to_u64(load_attr->license);
 
-	attr.log_level = log_level;
-	if (log_level) {
-		attr.log_buf = ptr_to_u64(log_buf);
-		attr.log_size = log_buf_sz;
-	} else {
-		attr.log_buf = ptr_to_u64(NULL);
-		attr.log_size = 0;
+	attr.log_level = load_attr->log_level;
+	if (attr.log_level) {
+		attr.log_buf = ptr_to_u64(load_attr->log_buf);
+		attr.log_size = load_attr->log_buf_sz;
 	}
 
 	attr.prog_btf_fd = load_attr->prog_btf_fd;
+	attr.prog_flags = load_attr->prog_flags;
+
 	attr.func_info_rec_size = load_attr->func_info_rec_size;
 	attr.func_info_cnt = load_attr->func_info_cnt;
 	attr.func_info = ptr_to_u64(load_attr->func_info);
+
 	attr.line_info_rec_size = load_attr->line_info_rec_size;
 	attr.line_info_cnt = load_attr->line_info_cnt;
 	attr.line_info = ptr_to_u64(load_attr->line_info);
+
 	if (load_attr->name)
 		memcpy(attr.prog_name, load_attr->name,
-		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
-	attr.prog_flags = load_attr->prog_flags;
+		       min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1));
 
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 	if (fd >= 0)
@@ -307,19 +300,19 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		}
 
 		fd = sys_bpf_prog_load(&attr, sizeof(attr));
-
 		if (fd >= 0)
 			goto done;
 	}
 
-	if (log_level || !log_buf)
+	if (load_attr->log_level || !load_attr->log_buf)
 		goto done;
 
 	/* Try again with log */
-	attr.log_buf = ptr_to_u64(log_buf);
-	attr.log_size = log_buf_sz;
+	attr.log_buf = ptr_to_u64(load_attr->log_buf);
+	attr.log_size = load_attr->log_buf_sz;
 	attr.log_level = 1;
-	log_buf[0] = 0;
+	load_attr->log_buf[0] = 0;
+
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 done:
 	free(finfo);
@@ -327,6 +320,49 @@ done:
 	return fd;
 }
 
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+			   char *log_buf, size_t log_buf_sz)
+{
+	struct bpf_prog_load_params p = {};
+
+	if (!load_attr || !log_buf != !log_buf_sz)
+		return -EINVAL;
+
+	p.prog_type = load_attr->prog_type;
+	p.expected_attach_type = load_attr->expected_attach_type;
+	switch (p.prog_type) {
+	case BPF_PROG_TYPE_STRUCT_OPS:
+	case BPF_PROG_TYPE_LSM:
+		p.attach_btf_id = load_attr->attach_btf_id;
+		break;
+	case BPF_PROG_TYPE_TRACING:
+	case BPF_PROG_TYPE_EXT:
+		p.attach_btf_id = load_attr->attach_btf_id;
+		p.attach_prog_fd = load_attr->attach_prog_fd;
+		break;
+	default:
+		p.prog_ifindex = load_attr->prog_ifindex;
+		p.kern_version = load_attr->kern_version;
+	}
+	p.insn_cnt = load_attr->insns_cnt;
+	p.insns = load_attr->insns;
+	p.license = load_attr->license;
+	p.log_level = load_attr->log_level;
+	p.log_buf = log_buf;
+	p.log_buf_sz = log_buf_sz;
+	p.prog_btf_fd = load_attr->prog_btf_fd;
+	p.func_info_rec_size = load_attr->func_info_rec_size;
+	p.func_info_cnt = load_attr->func_info_cnt;
+	p.func_info = load_attr->func_info;
+	p.line_info_rec_size = load_attr->line_info_rec_size;
+	p.line_info_cnt = load_attr->line_info_cnt;
+	p.line_info = load_attr->line_info;
+	p.name = load_attr->name;
+	p.prog_flags = load_attr->prog_flags;
+
+	return libbpf__bpf_prog_load(&p);
+}
+
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ca20e493726d..103d66e27406 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6809,7 +6809,7 @@ static int
 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	     char *license, __u32 kern_version, int *pfd)
 {
-	struct bpf_load_program_attr load_attr;
+	struct bpf_prog_load_params load_attr = {};
 	char *cp, errmsg[STRERR_BUFSIZE];
 	size_t log_buf_size = 0;
 	char *log_buf = NULL;
@@ -6828,7 +6828,6 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	if (!insns || !insns_cnt)
 		return -EINVAL;
 
-	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
 	/* old kernels might not support specifying expected_attach_type */
 	if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
@@ -6839,19 +6838,14 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	if (kernel_supports(FEAT_PROG_NAME))
 		load_attr.name = prog->name;
 	load_attr.insns = insns;
-	load_attr.insns_cnt = insns_cnt;
+	load_attr.insn_cnt = insns_cnt;
 	load_attr.license = license;
-	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
-	    prog->type == BPF_PROG_TYPE_LSM) {
-		load_attr.attach_btf_id = prog->attach_btf_id;
-	} else if (prog->type == BPF_PROG_TYPE_TRACING ||
-		   prog->type == BPF_PROG_TYPE_EXT) {
-		load_attr.attach_prog_fd = prog->attach_prog_fd;
-		load_attr.attach_btf_id = prog->attach_btf_id;
-	} else {
-		load_attr.kern_version = kern_version;
-		load_attr.prog_ifindex = prog->prog_ifindex;
-	}
+	load_attr.attach_btf_id = prog->attach_btf_id;
+	load_attr.attach_prog_fd = prog->attach_prog_fd;
+	load_attr.attach_btf_id = prog->attach_btf_id;
+	load_attr.kern_version = kern_version;
+	load_attr.prog_ifindex = prog->prog_ifindex;
+
 	/* specify func_info/line_info only if kernel supports them */
 	btf_fd = bpf_object__btf_fd(prog->obj);
 	if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
@@ -6875,7 +6869,9 @@ retry_load:
 		*log_buf = 0;
 	}
 
-	ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
+	load_attr.log_buf = log_buf;
+	load_attr.log_buf_sz = log_buf_size;
+	ret = libbpf__bpf_prog_load(&load_attr);
 
 	if (ret >= 0) {
 		if (log_buf && load_attr.log_level)
@@ -6916,9 +6912,9 @@ retry_load:
 		pr_warn("-- BEGIN DUMP LOG ---\n");
 		pr_warn("\n%s\n", log_buf);
 		pr_warn("-- END LOG --\n");
-	} else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+	} else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
 		pr_warn("Program too large (%zu insns), at most %d insns\n",
-			load_attr.insns_cnt, BPF_MAXINSNS);
+			load_attr.insn_cnt, BPF_MAXINSNS);
 		ret = -LIBBPF_ERRNO__PROG2BIG;
 	} else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
 		/* Wrong program type? */
@@ -6926,7 +6922,9 @@ retry_load:
 
 		load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
 		load_attr.expected_attach_type = 0;
-		fd = bpf_load_program_xattr(&load_attr, NULL, 0);
+		load_attr.log_buf = NULL;
+		load_attr.log_buf_sz = 0;
+		fd = libbpf__bpf_prog_load(&load_attr);
 		if (fd >= 0) {
 			close(fd);
 			ret = -LIBBPF_ERRNO__PROGTYPE;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index e569ae63808e..681073a67ae3 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -151,6 +151,35 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len);
 
+struct bpf_prog_load_params {
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	const char *name;
+	const struct bpf_insn *insns;
+	size_t insn_cnt;
+	const char *license;
+	__u32 kern_version;
+	__u32 attach_prog_fd;
+	__u32 attach_btf_id;
+	__u32 prog_ifindex;
+	__u32 prog_btf_fd;
+	__u32 prog_flags;
+
+	__u32 func_info_rec_size;
+	const void *func_info;
+	__u32 func_info_cnt;
+
+	__u32 line_info_rec_size;
+	const void *line_info;
+	__u32 line_info_cnt;
+
+	__u32 log_level;
+	char *log_buf;
+	size_t log_buf_sz;
+};
+
+int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr);
+
 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 			     __u32 *size);
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
-- 
cgit v1.2.3


From 91abb4a6d79df6c4dcd86d85632df53c8cca2dcf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:32 -0800
Subject: libbpf: Support attachment of BPF tracing programs to kernel modules

Teach libbpf to search for BTF types in kernel modules for tracing BPF
programs. This allows attachment of raw_tp/fentry/fexit/fmod_ret/etc BPF
program types to tracepoints and functions in kernel modules.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-13-andrii@kernel.org
---
 tools/lib/bpf/bpf.c             |   5 +-
 tools/lib/bpf/libbpf.c          | 138 +++++++++++++++++++++++++++++++---------
 tools/lib/bpf/libbpf_internal.h |   1 +
 3 files changed, 112 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5d681ce32b37..bba48ff4c5c0 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -231,8 +231,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
 
+	if (load_attr->attach_prog_fd)
+		attr.attach_prog_fd = load_attr->attach_prog_fd;
+	else
+		attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd;
 	attr.attach_btf_id = load_attr->attach_btf_id;
-	attr.attach_prog_fd = load_attr->attach_prog_fd;
 
 	attr.prog_ifindex = load_attr->prog_ifindex;
 	attr.kern_version = load_attr->kern_version;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 103d66e27406..912e01b946fe 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -278,6 +278,7 @@ struct bpf_program {
 	enum bpf_prog_type type;
 	enum bpf_attach_type expected_attach_type;
 	int prog_ifindex;
+	__u32 attach_btf_obj_fd;
 	__u32 attach_btf_id;
 	__u32 attach_prog_fd;
 	void *func_info;
@@ -408,6 +409,7 @@ struct module_btf {
 	struct btf *btf;
 	char *name;
 	__u32 id;
+	int fd;
 };
 
 struct bpf_object {
@@ -4766,8 +4768,7 @@ static int load_module_btfs(struct bpf_object *obj)
 		if (err) {
 			err = -errno;
 			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
-			close(fd);
-			return err;
+			goto err_out;
 		}
 
 		/* ignore non-module BTFs */
@@ -4777,25 +4778,33 @@ static int load_module_btfs(struct bpf_object *obj)
 		}
 
 		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
-		close(fd);
 		if (IS_ERR(btf)) {
 			pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
 				name, id, PTR_ERR(btf));
-			return PTR_ERR(btf);
+			err = PTR_ERR(btf);
+			goto err_out;
 		}
 
 		err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
 				     sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 		if (err)
-			return err;
+			goto err_out;
 
 		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
 
 		mod_btf->btf = btf;
 		mod_btf->id = id;
+		mod_btf->fd = fd;
 		mod_btf->name = strdup(name);
-		if (!mod_btf->name)
-			return -ENOMEM;
+		if (!mod_btf->name) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		continue;
+
+err_out:
+		close(fd);
+		return err;
 	}
 
 	return 0;
@@ -6841,7 +6850,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.insn_cnt = insns_cnt;
 	load_attr.license = license;
 	load_attr.attach_btf_id = prog->attach_btf_id;
-	load_attr.attach_prog_fd = prog->attach_prog_fd;
+	if (prog->attach_prog_fd)
+		load_attr.attach_prog_fd = prog->attach_prog_fd;
+	else
+		load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
 	load_attr.attach_btf_id = prog->attach_btf_id;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
@@ -6937,11 +6949,11 @@ out:
 	return ret;
 }
 
-static int libbpf_find_attach_btf_id(struct bpf_program *prog);
+static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
 
 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
-	int err = 0, fd, i, btf_id;
+	int err = 0, fd, i;
 
 	if (prog->obj->loaded) {
 		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
@@ -6951,10 +6963,14 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 	if ((prog->type == BPF_PROG_TYPE_TRACING ||
 	     prog->type == BPF_PROG_TYPE_LSM ||
 	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
-		btf_id = libbpf_find_attach_btf_id(prog);
-		if (btf_id <= 0)
-			return btf_id;
-		prog->attach_btf_id = btf_id;
+		int btf_obj_fd = 0, btf_type_id = 0;
+
+		err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
+		if (err)
+			return err;
+
+		prog->attach_btf_obj_fd = btf_obj_fd;
+		prog->attach_btf_id = btf_type_id;
 	}
 
 	if (prog->instances.nr < 0 || !prog->instances.fds) {
@@ -7467,6 +7483,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 
 	/* clean up module BTFs */
 	for (i = 0; i < obj->btf_module_cnt; i++) {
+		close(obj->btf_modules[i].fd);
 		btf__free(obj->btf_modules[i].btf);
 		free(obj->btf_modules[i].name);
 	}
@@ -8821,8 +8838,8 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 	return btf__find_by_name_kind(btf, btf_type_name, kind);
 }
 
-static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
-					enum bpf_attach_type attach_type)
+static inline int find_attach_btf_id(struct btf *btf, const char *name,
+				     enum bpf_attach_type attach_type)
 {
 	int err;
 
@@ -8838,9 +8855,6 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
 	else
 		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 
-	if (err <= 0)
-		pr_warn("%s is not found in vmlinux BTF\n", name);
-
 	return err;
 }
 
@@ -8856,7 +8870,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 		return -EINVAL;
 	}
 
-	err = __find_vmlinux_btf_id(btf, name, attach_type);
+	err = find_attach_btf_id(btf, name, attach_type);
+	if (err <= 0)
+		pr_warn("%s is not found in vmlinux BTF\n", name);
+
 	btf__free(btf);
 	return err;
 }
@@ -8894,11 +8911,49 @@ out:
 	return err;
 }
 
-static int libbpf_find_attach_btf_id(struct bpf_program *prog)
+static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
+			      enum bpf_attach_type attach_type,
+			      int *btf_obj_fd, int *btf_type_id)
+{
+	int ret, i;
+
+	ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
+	if (ret > 0) {
+		*btf_obj_fd = 0; /* vmlinux BTF */
+		*btf_type_id = ret;
+		return 0;
+	}
+	if (ret != -ENOENT)
+		return ret;
+
+	ret = load_module_btfs(obj);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		const struct module_btf *mod = &obj->btf_modules[i];
+
+		ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
+		if (ret > 0) {
+			*btf_obj_fd = mod->fd;
+			*btf_type_id = ret;
+			return 0;
+		}
+		if (ret == -ENOENT)
+			continue;
+
+		return ret;
+	}
+
+	return -ESRCH;
+}
+
+static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id)
 {
 	enum bpf_attach_type attach_type = prog->expected_attach_type;
 	__u32 attach_prog_fd = prog->attach_prog_fd;
-	const char *name = prog->sec_name;
+	const char *name = prog->sec_name, *attach_name;
+	const struct bpf_sec_def *sec = NULL;
 	int i, err;
 
 	if (!name)
@@ -8909,17 +8964,37 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
 			continue;
 		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
 			continue;
-		if (attach_prog_fd)
-			err = libbpf_find_prog_btf_id(name + section_defs[i].len,
-						      attach_prog_fd);
-		else
-			err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
-						    name + section_defs[i].len,
-						    attach_type);
+
+		sec = &section_defs[i];
+		break;
+	}
+
+	if (!sec) {
+		pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
+		return -ESRCH;
+	}
+	attach_name = name + sec->len;
+
+	/* BPF program's BTF ID */
+	if (attach_prog_fd) {
+		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
+		if (err < 0) {
+			pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
+				 attach_prog_fd, attach_name, err);
+			return err;
+		}
+		*btf_obj_fd = 0;
+		*btf_type_id = err;
+		return 0;
+	}
+
+	/* kernel/module BTF ID */
+	err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+	if (err) {
+		pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
 		return err;
 	}
-	pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
-	return -ESRCH;
+	return 0;
 }
 
 int libbpf_attach_type_by_name(const char *name,
@@ -10808,6 +10883,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 		return btf_id;
 
 	prog->attach_btf_id = btf_id;
+	prog->attach_btf_obj_fd = 0;
 	prog->attach_prog_fd = attach_prog_fd;
 	return 0;
 }
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 681073a67ae3..969d0ac592ba 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -160,6 +160,7 @@ struct bpf_prog_load_params {
 	const char *license;
 	__u32 kern_version;
 	__u32 attach_prog_fd;
+	__u32 attach_btf_obj_fd;
 	__u32 attach_btf_id;
 	__u32 prog_ifindex;
 	__u32 prog_btf_fd;
-- 
cgit v1.2.3


From bc9ed69c79ae7577314a24e09c5b0d1c1c314ced Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:33 -0800
Subject: selftests/bpf: Add tp_btf CO-RE reloc test for modules

Add another CO-RE relocation test for kernel module relocations. This time for
tp_btf with direct memory reads.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-14-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  |  3 +-
 .../selftests/bpf/progs/test_core_reloc_module.c   | 32 +++++++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index bb980848cd77..06eb956ff7bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -514,7 +514,8 @@ static struct core_reloc_test_case test_cases[] = {
 	},
 
 	/* validate we can find kernel module BTF types for relocs/attach */
-	MODULES_CASE("module", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
+	MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
+	MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL),
 
 	/* validate BPF program can use multiple flavors to match against
 	 * single target BTF type
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index d1840c1a9d36..56363959f7b0 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -36,7 +36,7 @@ struct core_reloc_module_output {
 };
 
 SEC("raw_tp/bpf_testmod_test_read")
-int BPF_PROG(test_core_module,
+int BPF_PROG(test_core_module_probed,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
@@ -64,3 +64,33 @@ int BPF_PROG(test_core_module,
 
 	return 0;
 }
+
+SEC("tp_btf/bpf_testmod_test_read")
+int BPF_PROG(test_core_module_direct,
+	     struct task_struct *task,
+	     struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	struct core_reloc_module_output *out = (void *)&data.out;
+	__u64 pid_tgid = bpf_get_current_pid_tgid();
+	__u32 real_tgid = (__u32)(pid_tgid >> 32);
+	__u32 real_pid = (__u32)pid_tgid;
+
+	if (data.my_pid_tgid != pid_tgid)
+		return 0;
+
+	if (task->pid != real_pid || task->tgid != real_tgid)
+		return 0;
+
+	out->len = read_ctx->len;
+	out->off = read_ctx->off;
+
+	out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);
+	out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx);
+	out->buf_exists = bpf_core_field_exists(read_ctx->buf);
+	out->off_exists = bpf_core_field_exists(read_ctx->off);
+	out->len_exists = bpf_core_field_exists(read_ctx->len);
+
+	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 1e38abefcfd65f3ef7b12895dfd48db80aca28da Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:34 -0800
Subject: selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module

Add new selftest checking attachment of fentry/fexit/fmod_ret (and raw
tracepoint ones for completeness) BPF programs to kernel module function.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-15-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/module_attach.c       | 53 +++++++++++++++++
 .../selftests/bpf/progs/test_module_attach.c       | 66 ++++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/module_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_module_attach.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
new file mode 100644
index 000000000000..4b65e9918764
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_module_attach.skel.h"
+
+static int duration;
+
+static int trigger_module_test_read(int read_sz)
+{
+	int fd, err;
+
+	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+	err = -errno;
+	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+		return err;
+
+	read(fd, NULL, read_sz);
+	close(fd);
+
+	return 0;
+}
+
+void test_module_attach(void)
+{
+	const int READ_SZ = 456;
+	struct test_module_attach* skel;
+	struct test_module_attach__bss *bss;
+	int err;
+
+	skel = test_module_attach__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	bss = skel->bss;
+
+	err = test_module_attach__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read");
+
+	ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
+	ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
+	ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+	ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
+	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
+	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
+
+cleanup:
+	test_module_attach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
new file mode 100644
index 000000000000..b563563df172
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+__u32 raw_tp_read_sz = 0;
+
+SEC("raw_tp/bpf_testmod_test_read")
+int BPF_PROG(handle_raw_tp,
+	     struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	raw_tp_read_sz = BPF_CORE_READ(read_ctx, len);
+	return 0;
+}
+
+__u32 tp_btf_read_sz = 0;
+
+SEC("tp_btf/bpf_testmod_test_read")
+int BPF_PROG(handle_tp_btf,
+	     struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	tp_btf_read_sz = read_ctx->len;
+	return 0;
+}
+
+__u32 fentry_read_sz = 0;
+
+SEC("fentry/bpf_testmod_test_read")
+int BPF_PROG(handle_fentry,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fentry_read_sz = len;
+	return 0;
+}
+
+__u32 fexit_read_sz = 0;
+int fexit_ret = 0;
+
+SEC("fexit/bpf_testmod_test_read")
+int BPF_PROG(handle_fexit,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len,
+	     int ret)
+{
+	fexit_read_sz = len;
+	fexit_ret = ret;
+	return 0;
+}
+
+__u32 fmod_ret_read_sz = 0;
+
+SEC("fmod_ret/bpf_testmod_test_read")
+int BPF_PROG(handle_fmod_ret,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fmod_ret_read_sz = len;
+	return 0; /* don't override the exit code */
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 3015b500ae42356936b9b4a8b660eacaee7a6147 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 15:54:39 -0800
Subject: libbpf: Use memcpy instead of strncpy to please GCC

Some versions of GCC are really nit-picky about strncpy() use. Use memcpy(),
as they are pretty much equivalent for the case of fixed length strings.

Fixes: e459f49b4394 ("libbpf: Separate XDP program load with xsk socket creation")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203235440.2302137-1-andrii@kernel.org
---
 tools/lib/bpf/xsk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 4b051ec7cfbb..e3e41ceeb1bc 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -583,7 +583,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 	}
 
 	ctx->ifindex = ifindex;
-	strncpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	memcpy(ctx->ifname, ifname, IFNAMSIZ -1);
 	ctx->ifname[IFNAMSIZ - 1] = 0;
 
 	xsk->ctx = ctx;
-- 
cgit v1.2.3


From eceae70bdeaeb6b8ceb662983cf663ff352fbc96 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 15:54:40 -0800
Subject: selftests/bpf: Fix invalid use of strncat in test_sockmap

strncat()'s third argument is how many bytes will be added *in addition* to
already existing bytes in destination. Plus extra zero byte will be added
after that. So existing use in test_sockmap has many opportunities to overflow
the string and cause memory corruptions. And in this case, GCC complains for
a good reason.

Fixes: 16962b2404ac ("bpf: sockmap, add selftests")
Fixes: 73563aa3d977 ("selftests/bpf: test_sockmap, print additional test options")
Fixes: 1ade9abadfca ("bpf: test_sockmap, add options for msg_pop_data() helper")
Fixes: 463bac5f1ca7 ("bpf, selftests: Add test for ktls with skb bpf ingress policy")
Fixes: e9dd904708c4 ("bpf: add tls support for testing in test_sockmap")
Fixes: 753fb2ee0934 ("bpf: sockmap, add msg_peek tests to test_sockmap")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203235440.2302137-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_sockmap.c | 36 +++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 0fa1e421c3d7..427ca00a3217 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1273,6 +1273,16 @@ static char *test_to_str(int test)
 	return "unknown";
 }
 
+static void append_str(char *dst, const char *src, size_t dst_cap)
+{
+	size_t avail = dst_cap - strlen(dst);
+
+	if (avail <= 1) /* just zero byte could be written */
+		return;
+
+	strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
+}
+
 #define OPTSTRING 60
 static void test_options(char *options)
 {
@@ -1281,42 +1291,42 @@ static void test_options(char *options)
 	memset(options, 0, OPTSTRING);
 
 	if (txmsg_pass)
-		strncat(options, "pass,", OPTSTRING);
+		append_str(options, "pass,", OPTSTRING);
 	if (txmsg_redir)
-		strncat(options, "redir,", OPTSTRING);
+		append_str(options, "redir,", OPTSTRING);
 	if (txmsg_drop)
-		strncat(options, "drop,", OPTSTRING);
+		append_str(options, "drop,", OPTSTRING);
 	if (txmsg_apply) {
 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_cork) {
 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_start) {
 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_end) {
 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_start_pop) {
 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_ingress)
-		strncat(options, "ingress,", OPTSTRING);
+		append_str(options, "ingress,", OPTSTRING);
 	if (txmsg_redir_skb)
-		strncat(options, "redir_skb,", OPTSTRING);
+		append_str(options, "redir_skb,", OPTSTRING);
 	if (txmsg_ktls_skb)
-		strncat(options, "ktls_skb,", OPTSTRING);
+		append_str(options, "ktls_skb,", OPTSTRING);
 	if (ktls)
-		strncat(options, "ktls,", OPTSTRING);
+		append_str(options, "ktls,", OPTSTRING);
 	if (peek_flag)
-		strncat(options, "peek,", OPTSTRING);
+		append_str(options, "peek,", OPTSTRING);
 }
 
 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
-- 
cgit v1.2.3


From 23afeaeff3d985b07abf2c76fd12b8c548da8367 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 18 Nov 2020 11:47:46 +0100
Subject: selftests: core: add tests for CLOSE_RANGE_CLOEXEC

check that close_range(initial_fd, last_fd, CLOSE_RANGE_CLOEXEC)
correctly sets the close-on-exec bit for the specified file
descriptors.

Open 100 file descriptors and set the close-on-exec flag for a subset
of them first, then set it for every file descriptor above 2.  Make
sure RLIMIT_NOFILE doesn't affect the result.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
Link: https://lore.kernel.org/r/20201118104746.873084-3-gscrivan@redhat.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/core/close_range_test.c | 74 +++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 575b391ddc78..87e16d65d9d7 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <syscall.h>
 #include <unistd.h>
+#include <sys/resource.h>
 
 #include "../kselftest_harness.h"
 #include "../clone3/clone3_selftests.h"
@@ -23,6 +24,10 @@
 #define CLOSE_RANGE_UNSHARE	(1U << 1)
 #endif
 
+#ifndef CLOSE_RANGE_CLOEXEC
+#define CLOSE_RANGE_CLOEXEC	(1U << 2)
+#endif
+
 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
 				  unsigned int flags)
 {
@@ -224,4 +229,73 @@ TEST(close_range_unshare_capped)
 	EXPECT_EQ(0, WEXITSTATUS(status));
 }
 
+TEST(close_range_cloexec)
+{
+	int i, ret;
+	int open_fds[101];
+	struct rlimit rlimit;
+
+	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+		int fd;
+
+		fd = open("/dev/null", O_RDONLY);
+		ASSERT_GE(fd, 0) {
+			if (errno == ENOENT)
+				XFAIL(return, "Skipping test since /dev/null does not exist");
+		}
+
+		open_fds[i] = fd;
+	}
+
+	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			XFAIL(return, "close_range() syscall not supported");
+		if (errno == EINVAL)
+			XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+	}
+
+	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
+	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+	rlimit.rlim_cur = 25;
+	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
+	ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
+	ASSERT_EQ(0, ret);
+	ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
+	ASSERT_EQ(0, ret);
+
+	for (i = 0; i <= 50; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+
+	for (i = 51; i <= 74; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, 0);
+	}
+
+	for (i = 75; i <= 100; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+
+	/* Test a common pattern.  */
+	ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
+	for (i = 0; i <= 100; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+}
+
+
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3


From 7d17167244f5415bc6bc90f5bb0074b6d79676b4 Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Fri, 4 Dec 2020 19:18:27 +0100
Subject: selftests/bpf: Print reason when a tester could not run a program

Commit 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported
program types") added a check to skip unsupported program types. As
bpf_probe_prog_type can change errno, do_single_test should save it before
printing a reason why a supported BPF program type failed to load.

Fixes: 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types")
Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201204181828.11974-2-dev@der-flo.net
---
 tools/testing/selftests/bpf/test_verifier.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4bfe3aa2cfc4..ceea9409639e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -936,6 +936,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int run_errs, run_successes;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	int saved_errno;
 	int fixup_skips;
 	__u32 pflags;
 	int i, err;
@@ -997,6 +998,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	}
 
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
+	saved_errno = errno;
 
 	/* BPF_PROG_TYPE_TRACING requires more setup and
 	 * bpf_probe_prog_type won't give correct answer
@@ -1013,7 +1015,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
 		if (fd_prog < 0) {
 			printf("FAIL\nFailed to load prog '%s'!\n",
-			       strerror(errno));
+			       strerror(saved_errno));
 			goto fail_log;
 		}
 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-- 
cgit v1.2.3


From 5f61b7c6975b03e6ace2cfb13d415d5f475c8830 Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Fri, 4 Dec 2020 19:18:28 +0100
Subject: selftests/bpf: Avoid errno clobbering

Print a message when the returned error is about a program type being
not supported or because of permission problems.
These messages are expected if the program to test was actually
executed.

Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201204181828.11974-3-dev@der-flo.net
---
 tools/testing/selftests/bpf/test_verifier.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ceea9409639e..777a81404fdb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -875,19 +875,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 	__u8 tmp[TEST_DATA_LEN << 2];
 	__u32 size_tmp = sizeof(tmp);
 	uint32_t retval;
-	int err;
+	int err, saved_errno;
 
 	if (unpriv)
 		set_admin(true);
 	err = bpf_prog_test_run(fd_prog, 1, data, size_data,
 				tmp, &size_tmp, &retval, NULL);
+	saved_errno = errno;
+
 	if (unpriv)
 		set_admin(false);
-	if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
-		printf("Unexpected bpf_prog_test_run error ");
-		return err;
+
+	if (err) {
+		switch (saved_errno) {
+		case 524/*ENOTSUPP*/:
+			printf("Did not run the program (not supported) ");
+			return 0;
+		case EPERM:
+			if (unpriv) {
+				printf("Did not run the program (no permission) ");
+				return 0;
+			}
+			/* fallthrough; */
+		default:
+			printf("FAIL: Unexpected bpf_prog_test_run error (%s) ",
+				strerror(saved_errno));
+			return err;
+		}
 	}
-	if (!err && retval != expected_val &&
+
+	if (retval != expected_val &&
 	    expected_val != POINTER_VALUE) {
 		printf("FAIL retval %d != %d ", retval, expected_val);
 		return 1;
-- 
cgit v1.2.3


From 2195444e09b4fd3488a69e2f269a401dd4e4f512 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Wed, 2 Dec 2020 14:05:16 +0100
Subject: selftests: add selftest for the SRv6 End.DT4 behavior

this selftest is designed for evaluating the new SRv6 End.DT4 behavior
used, in this example, for implementing IPv4 L3 VPN use cases.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt4_l3vpn_test.sh       | 494 +++++++++++++++++++++
 1 file changed, 494 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
new file mode 100755
index 000000000000..ad7a9fc59934
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -0,0 +1,494 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
+# implementing IPv4 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT4 behavior and c) VRF.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  | 10.0.0.1/24 |  |                                   |  | 10.0.0.2/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | | 10.0.0.254/24 |    +----------+ |   | +----------+    | 10.0.0.254/24 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | | 10.0.0.254/24 |                 |   |                 | 10.0.0.254/24 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  | 10.0.0.3/24 |  |                                   |  | 10.0.0.4/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.2   |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.4   |apply seg6 encap segs fc00:12:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.1   |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.3   |apply seg6 encap segs fc00:21:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	# destroy routers rt-* and hosts hs-*
+	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+		ip netns del ${ns} || true
+	done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt=$1
+	local nsname=rt-${rt}
+
+	ip netns add ${nsname}
+	ip link set veth-rt-${rt} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hs=$1
+	local rt=$2
+	local tid=$3
+	local hsname=hs-t${tid}-${hs}
+	local rtname=rt-${rt}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns add ${hsname}
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	local hssrc_name=hs-t${tid}-${hssrc}
+	local hsdst_name=hs-t${tid}-${hsdst}
+	local rtsrc_name=rt-${rtsrc}
+	local rtdst_name=rt-${rtdst}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT4 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv4 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
-- 
cgit v1.2.3


From 2bc035538e167e28d900f2f51403458a05d7cc4a Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Wed, 2 Dec 2020 14:05:17 +0100
Subject: selftests: add selftest for the SRv6 End.DT6 (VRF) behavior

this selftest is designed for evaluating the new SRv6 End.DT6 (VRF) behavior
used, in this example, for implementing IPv6 L3 VPN use cases.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Paolo Lungaroni <paolo.lungaroni@cnit.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt6_l3vpn_test.sh       | 502 +++++++++++++++++++++
 1 file changed, 502 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
new file mode 100755
index 000000000000..68708f5e26a0
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -0,0 +1,502 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@cnit.it>
+
+# This test is designed for evaluating the new SRv6 End.DT6 behavior used for
+# implementing IPv6 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT6 behavior and c) VRF.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::1/64 |  |                                   |  |  cafe::2/64 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | |  cafe::254/64 |    +----------+ |   | +----------+    |  cafe::254/64 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::3/64 |  |                                   |  |  cafe::4/64 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::2    |apply seg6 encap segs fc00:12:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::4    |apply seg6 encap segs fc00:12:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::1    |apply seg6 encap segs fc00:21:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::3    |apply seg6 encap segs fc00:21:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	# destroy routers rt-* and hosts hs-*
+	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+		ip netns del ${ns} || true
+	done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt=$1
+	local nsname=rt-${rt}
+
+	ip netns add ${nsname}
+	ip link set veth-rt-${rt} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hs=$1
+	local rt=$2
+	local tid=$3
+	local hsname=hs-t${tid}-${hs}
+	local rtname=rt-${rt}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns add ${hsname}
+
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	local hssrc_name=hs-t${tid}-${hssrc}
+	local hsdst_name=hs-t${tid}-${hsdst}
+	local rtsrc_name=rt-${rtsrc}
+	local rtdst_name=rt-${rtdst}
+	local rtveth=veth-t${tid}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
+
+	ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT6 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv6 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv6 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv6 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
-- 
cgit v1.2.3


From 4f19cab76136e800a3f04d8c9aa4d8e770e3d3d8 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:05 +0100
Subject: bpf: Add a bpf_sock_from_file helper

While eBPF programs can check whether a file is a socket by file->f_op
== &socket_file_ops, they cannot convert the void private_data pointer
to a struct socket BTF pointer. In order to do this a new helper
wrapping sock_from_file is added.

This is useful to tracing programs but also other program types
inheriting this set of helpers such as iterators or LSM programs.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-2-revest@google.com
---
 tools/include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 593f6d41abbbc63e1ad297f7a36ab6060a812f0c Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:07 +0100
Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_delete

The eBPF program iterates over all entries (well, only one) of a socket
local storage map and deletes them all. The test makes sure that the
entry is indeed deleted.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-4-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 64 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c    | 23 ++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 448885b95eed..bb4a638f2e6f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -20,6 +20,7 @@
 #include "bpf_iter_bpf_percpu_hash_map.skel.h"
 #include "bpf_iter_bpf_array_map.skel.h"
 #include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_helpers.skel.h"
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
 #include "bpf_iter_test_kern5.skel.h"
 #include "bpf_iter_test_kern6.skel.h"
@@ -913,6 +914,67 @@ out:
 	bpf_iter_bpf_percpu_array_map__destroy(skel);
 }
 
+/* An iterator program deletes all local storage in a map. */
+static void test_bpf_sk_storage_delete(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_bpf_sk_storage_helpers *skel;
+	union bpf_iter_link_info linfo;
+	int err, len, map_fd, iter_fd;
+	struct bpf_link *link;
+	int sock_fd = -1;
+	__u32 val = 42;
+	char buf[64];
+
+	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+		goto out;
+	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+	if (CHECK(err, "map_update", "map_update failed\n"))
+		goto out;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
+					&opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	if (sock_fd >= 0)
+		close(sock_fd);
+	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
 static void test_bpf_sk_storage_map(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1067,6 +1129,8 @@ void test_bpf_iter(void)
 		test_bpf_percpu_array_map();
 	if (test__start_subtest("bpf_sk_storage_map"))
 		test_bpf_sk_storage_map();
+	if (test__start_subtest("bpf_sk_storage_delete"))
+		test_bpf_sk_storage_delete();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
new file mode 100644
index 000000000000..01ff3235e413
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google LLC. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("iter/bpf_sk_storage_map")
+int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+	if (ctx->sk)
+		bpf_sk_storage_delete(&sk_stg_map, ctx->sk);
+
+	return 0;
+}
-- 
cgit v1.2.3


From bd9b327e58f98aa7126291bf12b50720c660e787 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:08 +0100
Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_get

The eBPF program iterates over all files and tasks. For all socket
files, it stores the tgid of the last task it encountered with a handle
to that socket. This is a heuristic for finding the "owner" of a socket
similar to what's done by lsof, ss, netstat or fuser. Potentially, this
information could be used from a cgroup_skb/*gress hook to try to
associate network traffic with processes.

The test makes sure that a socket it created is tagged with prog_tests's
pid.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-5-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 40 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c    | 24 +++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index bb4a638f2e6f..9336d0f18331 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -975,6 +975,44 @@ out:
 	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
 }
 
+/* This creates a socket and its local storage. It then runs a task_iter BPF
+ * program that replaces the existing socket local storage with the tgid of the
+ * only task owning a file descriptor to this socket, this process, prog_tests.
+ */
+static void test_bpf_sk_storage_get(void)
+{
+	struct bpf_iter_bpf_sk_storage_helpers *skel;
+	int err, map_fd, val = -1;
+	int sock_fd = -1;
+
+	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+		goto out;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+	if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+		goto close_socket;
+
+	do_dummy_read(skel->progs.fill_socket_owner);
+
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+	      getpid(), val, err);
+
+close_socket:
+	close(sock_fd);
+out:
+	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
 static void test_bpf_sk_storage_map(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1131,6 +1169,8 @@ void test_bpf_iter(void)
 		test_bpf_sk_storage_map();
 	if (test__start_subtest("bpf_sk_storage_delete"))
 		test_bpf_sk_storage_delete();
+	if (test__start_subtest("bpf_sk_storage_get"))
+		test_bpf_sk_storage_get();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index 01ff3235e413..dde53df37de8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -21,3 +21,27 @@ int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
 
 	return 0;
 }
+
+SEC("iter/task_file")
+int fill_socket_owner(struct bpf_iter__task_file *ctx)
+{
+	struct task_struct *task = ctx->task;
+	struct file *file = ctx->file;
+	struct socket *sock;
+	int *sock_tgid;
+
+	if (!task || !file)
+		return 0;
+
+	sock = bpf_sock_from_file(file);
+	if (!sock)
+		return 0;
+
+	sock_tgid = bpf_sk_storage_get(&sk_stg_map, sock->sk, 0, 0);
+	if (!sock_tgid)
+		return 0;
+
+	*sock_tgid = task->tgid;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 34da87213d3ddd26643aa83deff7ffc6463da0fc Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:09 +0100
Subject: selftests/bpf: Test bpf_sk_storage_get in tcp iterators

This extends the existing bpf_sk_storage_get test where a socket is
created and tagged with its creator's pid by a task_file iterator.

A TCP iterator is now also used at the end of the test to negate the
values already stored in the local storage. The test therefore expects
-getpid() to be stored in the local storage.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-6-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c      | 18 ++++++++++++++++--
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c        | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 9336d0f18331..0e586368948d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -978,6 +978,8 @@ out:
 /* This creates a socket and its local storage. It then runs a task_iter BPF
  * program that replaces the existing socket local storage with the tgid of the
  * only task owning a file descriptor to this socket, this process, prog_tests.
+ * It then runs a tcp socket iterator that negates the value in the existing
+ * socket local storage, the test verifies that the resulting value is -pid.
  */
 static void test_bpf_sk_storage_get(void)
 {
@@ -994,6 +996,10 @@ static void test_bpf_sk_storage_get(void)
 	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
 		goto out;
 
+	err = listen(sock_fd, 1);
+	if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+		goto close_socket;
+
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
@@ -1003,9 +1009,17 @@ static void test_bpf_sk_storage_get(void)
 	do_dummy_read(skel->progs.fill_socket_owner);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	    "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+	    getpid(), val, err))
+		goto close_socket;
+
+	do_dummy_read(skel->progs.negate_socket_local_storage);
+
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
 	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	      getpid(), val, err);
+	      -getpid(), val, err);
 
 close_socket:
 	close(sock_fd);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index dde53df37de8..6cecab2b32ba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -45,3 +45,21 @@ int fill_socket_owner(struct bpf_iter__task_file *ctx)
 
 	return 0;
 }
+
+SEC("iter/tcp")
+int negate_socket_local_storage(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	int *sock_tgid;
+
+	if (!sk_common)
+		return 0;
+
+	sock_tgid = bpf_sk_storage_get(&sk_stg_map, sk_common, 0, 0);
+	if (!sock_tgid)
+		return 0;
+
+	*sock_tgid = -*sock_tgid;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 41fdfffd5783db62bb9e00605eee14c69b9c0974 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 2 Dec 2020 15:35:43 +0100
Subject: selftests: forwarding: Add MPLS L2VPN test

Connect hosts H1 and H2 using two intermediate encapsulation routers
(LER1 and LER2). These routers encapsulate traffic from the hosts,
including the original Ethernet header, into MPLS.

Use ping to test reachability between H1 and H2.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/625f5c1aafa3a8085f8d3e082d680a82e16ffbaa.1606918980.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/Makefile    |   1 +
 tools/testing/selftests/net/forwarding/config      |   3 +
 .../selftests/net/forwarding/tc_mpls_l2vpn.sh      | 192 +++++++++++++++++++++
 3 files changed, 196 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 250fbb2d1625..d97bd6889446 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -48,6 +48,7 @@ TEST_PROGS = bridge_igmp.sh \
 	tc_chains.sh \
 	tc_flower_router.sh \
 	tc_flower.sh \
+	tc_mpls_l2vpn.sh \
 	tc_shblocks.sh \
 	tc_vlan_modify.sh \
 	vxlan_asymmetric.sh \
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index da96eff72a8e..10e9a3321ae1 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,6 +6,9 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NET_VRF=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
new file mode 100755
index 000000000000..03743f04e178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+
+# | H1 (v$h1)             |
+# | 192.0.2.1/24          |
+# | 2001:db8::1/124       |
+# |                 + $h1 |
+# +-----------------|-----+
+#                   |
+#                   | (Plain Ethernet traffic)
+#                   |
+# +-----------------|-----------------------------------------+
+# | LER1            + $edge1                                  |
+# |                     -ingress:                             |
+# |                       -encapsulate Ethernet into MPLS     |
+# |                       -add outer Ethernet header          |
+# |                       -redirect to $mpls1 (egress)        |
+# |                                                           |
+# |                 + $mpls1                                  |
+# |                 |   -ingress:                             |
+# |                 |     -remove outer Ethernet header       |
+# |                 |     -remove MPLS header                 |
+# |                 |     -redirect to $edge1 (egress)        |
+# +-----------------|-----------------------------------------+
+#                   |
+#                   | (Ethernet over MPLS traffic)
+#                   |
+# +-----------------|-----------------------------------------+
+# | LER2            + $mpls2                                  |
+# |                     -ingress:                             |
+# |                       -remove outer Ethernet header       |
+# |                       -remove MPLS header                 |
+# |                       -redirect to $edge2 (egress)        |
+# |                                                           |
+# |                 + $edge2                                  |
+# |                 |   -ingress:                             |
+# |                 |     -encapsulate Ethernet into MPLS     |
+# |                 |     -add outer Ethernet header          |
+# |                 |     -redirect to $mpls2 (egress)        |
+# +-----------------|-----------------------------------------|
+#                   |
+#                   | (Plain Ethernet traffic)
+#                   |
+# +-----------------|-----+
+# | H2 (v$h2)       |     |
+# |                 + $h2 |
+# | 192.0.2.2/24          |
+# | 2001:db8::2/124       |
+# +-----------------------+
+#
+# LER1 and LER2 logically represent two different routers. However, no VRF is
+# created for them, as they don't do any IP routing.
+
+ALL_TESTS="mpls_forward_eth"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+ler1_create()
+{
+	tc qdisc add dev $edge1 ingress
+	tc filter add dev $edge1 ingress                            \
+	   matchall                                                 \
+	   action mpls mac_push label 102                           \
+	   action vlan push_eth dst_mac $mpls2mac src_mac $mpls1mac \
+	   action mirred egress redirect dev $mpls1
+	ip link set dev $edge1 up
+
+	tc qdisc add dev $mpls1 ingress
+	tc filter add dev $mpls1 ingress            \
+	   protocol mpls_uc                         \
+	   flower mpls_label 101                    \
+	   action vlan pop_eth                      \
+	   action mpls pop protocol teb             \
+	   action mirred egress redirect dev $edge1
+	ip link set dev $mpls1 up
+}
+
+ler1_destroy()
+{
+	ip link set dev $mpls1 down
+	tc qdisc del dev $mpls1 ingress
+
+	ip link set dev $edge1 down
+	tc qdisc del dev $edge1 ingress
+}
+
+ler2_create()
+{
+	tc qdisc add dev $edge2 ingress
+	tc filter add dev $edge2 ingress                            \
+	   matchall                                                 \
+	   action mpls mac_push label 101                           \
+	   action vlan push_eth dst_mac $mpls1mac src_mac $mpls2mac \
+	   action mirred egress redirect dev $mpls2
+	ip link set dev $edge2 up
+
+	tc qdisc add dev $mpls2 ingress
+	tc filter add dev $mpls2 ingress            \
+	   protocol mpls_uc                         \
+	   flower mpls_label 102                    \
+	   action vlan pop_eth                      \
+	   action mpls pop protocol teb             \
+	   action mirred egress redirect dev $edge2
+	ip link set dev $mpls2 up
+}
+
+ler2_destroy()
+{
+	ip link set dev $mpls2 down
+	tc qdisc del dev $mpls2 ingress
+
+	ip link set dev $edge2 down
+	tc qdisc del dev $edge2 ingress
+}
+
+mpls_forward_eth()
+{
+	ping_test $h1 192.0.2.2
+	ping6_test $h1 2001:db8::2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	edge1=${NETIFS[p2]}
+
+	mpls1=${NETIFS[p3]}
+	mpls2=${NETIFS[p4]}
+
+	edge2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	mpls1mac=$(mac_get $mpls1)
+	mpls2mac=$(mac_get $mpls2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	ler1_create
+	ler2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ler2_destroy
+	ler1_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 205704c618af0ab2366015d2281a3b0814d918a0 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Thu, 3 Dec 2020 22:06:04 -0500
Subject: vrf: packets with lladdr src needs dst at input with orig_iif when
 needs strict

Depending on the order of the routes to fe80::/64 are installed on the
VRF table, the NS for the source link-local address of the originator
might be sent to the wrong interface.

This patch ensures that packets with link-local addr source is doing a
lookup with the orig_iif when the destination addr indicates that it
is strict.

Add the reproducer as a use case in self test script fcnal-test.sh.

Fixes: b4869aa2f881 ("net: vrf: ipv6 support for local traffic to local addresses")
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20201204030604.18828-1-ssuryaextr@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fcnal-test.sh | 95 +++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index fb5c55dd6df8..02b0b9ead40b 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -256,6 +256,28 @@ setup_cmd_nsb()
 	fi
 }
 
+setup_cmd_nsc()
+{
+	local cmd="$*"
+	local rc
+
+	run_cmd_nsc ${cmd}
+	rc=$?
+	if [ $rc -ne 0 ]; then
+		# show user the command if not done so already
+		if [ "$VERBOSE" = "0" ]; then
+			echo "setup command: $cmd"
+		fi
+		echo "failed. stopping tests"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue"
+			read a
+		fi
+		exit $rc
+	fi
+}
+
 # set sysctl values in NS-A
 set_sysctl()
 {
@@ -471,6 +493,36 @@ setup()
 	sleep 1
 }
 
+setup_lla_only()
+{
+	# make sure we are starting with a clean slate
+	kill_procs
+	cleanup 2>/dev/null
+
+	log_debug "Configuring network namespaces"
+	set -e
+
+	create_ns ${NSA} "-" "-"
+	create_ns ${NSB} "-" "-"
+	create_ns ${NSC} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV} "-" "-" \
+		   ${NSB} ${NSB_DEV} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV2} "-" "-" \
+		   ${NSC} ${NSC_DEV}  "-" "-"
+
+	NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV})
+	NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV})
+	NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV})
+
+	create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-"
+	ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF}
+	ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF}
+
+	set +e
+
+	sleep 1
+}
+
 ################################################################################
 # IPv4
 
@@ -3787,10 +3839,53 @@ use_case_br()
 	setup_cmd_nsb ip li del vlan100 2>/dev/null
 }
 
+# VRF only.
+# ns-A device is connected to both ns-B and ns-C on a single VRF but only has
+# LLA on the interfaces
+use_case_ping_lla_multi()
+{
+	setup_lla_only
+	# only want reply from ns-A
+	setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+	setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B"
+
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C"
+
+	# cycle/flap the first ns-A interface
+	setup_cmd ip link set ${NSA_DEV} down
+	setup_cmd ip link set ${NSA_DEV} up
+	sleep 1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B"
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C"
+
+	# cycle/flap the second ns-A interface
+	setup_cmd ip link set ${NSA_DEV2} down
+	setup_cmd ip link set ${NSA_DEV2} up
+	sleep 1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B"
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
+}
+
 use_cases()
 {
 	log_section "Use cases"
+	log_subsection "Device enslaved to bridge"
 	use_case_br
+	log_subsection "Ping LLA with multiple interfaces"
+	use_case_ping_lla_multi
 }
 
 ################################################################################
-- 
cgit v1.2.3


From 4e9a5ae8df5b3365183150f6df49e49dece80d8c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 3 Dec 2020 13:50:37 +0900
Subject: x86/uprobes: Do not use prefixes.nbytes when looping over
 prefixes.bytes

Since insn.prefixes.nbytes can be bigger than the size of
insn.prefixes.bytes[] when a prefix is repeated, the proper check must
be

  insn.prefixes.bytes[i] != 0 and i < 4

instead of using insn.prefixes.nbytes.

Introduce a for_each_insn_prefix() macro for this purpose. Debugged by
Kees Cook <keescook@chromium.org>.

 [ bp: Massage commit message, sync with the respective header in tools/
   and drop "we". ]

Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints")
Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/160697103739.3146288.7437620795200799020.stgit@devnote2
---
 tools/arch/x86/include/asm/insn.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 568854b14d0a..52c6262e6bfd 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)	\
+	for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
-- 
cgit v1.2.3


From d8cbe8bfa7df3c680ddfd5e1eee3a5c86d8dc764 Mon Sep 17 00:00:00 2001
From: Xingxing Su <suxingxing@loongson.cn>
Date: Sat, 5 Dec 2020 22:15:02 -0800
Subject: tools/testing/selftests/vm: fix build error

Only x86 and PowerPC implement the pkey-xxx.h, and an error was reported
when compiling protection_keys.c.

Add a Arch judgment to compile "protection_keys" in the Makefile.

If other arch implement this, add the arch name to the Makefile.
eg:
    ifneq (,$(findstring $(ARCH),powerpc mips ... ))

Following build errors:

    pkey-helpers.h:93:2: error: #error Architecture not supported
     #error Architecture not supported
    pkey-helpers.h:96:20: error: `PKEY_DISABLE_ACCESS' undeclared
     #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
                        ^
    protection_keys.c:218:45: error: `PKEY_DISABLE_WRITE' undeclared
     pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
                                                ^

Signed-off-by: Xingxing Su <suxingxing@loongson.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Mina Almasry <almasrymina@google.com>
Link: https://lkml.kernel.org/r/1606826876-30656-1-git-send-email-suxingxing@loongson.cn
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 30873b19d04b..691893afc15d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -60,9 +60,13 @@ ifeq ($(CAN_BUILD_X86_64),1)
 TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
+
+ifneq (,$(findstring $(ARCH),powerpc))
 TEST_GEN_FILES += protection_keys
 endif
 
+endif
+
 ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
-- 
cgit v1.2.3


From 573a259336f8c57739bdaf035aa7abbae7d9a713 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Sat, 5 Dec 2020 22:15:05 -0800
Subject: userfaultfd: selftests: fix SIGSEGV if huge mmap fails

The error handling in hugetlb_allocate_area() was incorrect for the
hugetlb_shared test case.

Previously the behavior was:

- mmap a hugetlb area
  - If this fails, set the pointer to NULL, and carry on
- mmap an alias of the same hugetlb fd
  - If this fails, munmap the original area

If the original mmap failed, it's likely the second one did too.  If
both failed, we'd blindly try to munmap a NULL pointer, causing a
SIGSEGV.  Instead, "goto fail" so we return before trying to mmap the
alias.

This issue can be hit "in real life" by forgetting to set
/proc/sys/vm/nr_hugepages (leaving it at 0), and then trying to run the
hugetlb_shared test.

Another small improvement is, when the original mmap fails, don't just
print "it failed": perror(), so we can see *why*.  :)

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Joe Perches <joe@perches.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Alan Gilbert <dgilbert@redhat.com>
Link: https://lkml.kernel.org/r/20201204203443.2714693-1-axelrasmussen@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9b0912a01777..c4425597769a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -206,19 +206,19 @@ static int hugetlb_release_pages(char *rel_area)
 	return ret;
 }
 
-
 static void hugetlb_allocate_area(void **alloc_area)
 {
 	void *area_alias = NULL;
 	char **alloc_area_alias;
+
 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
 			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
 			   MAP_HUGETLB,
 			   huge_fd, *alloc_area == area_src ? 0 :
 			   nr_pages * page_size);
 	if (*alloc_area == MAP_FAILED) {
-		fprintf(stderr, "mmap of hugetlbfs file failed\n");
-		*alloc_area = NULL;
+		perror("mmap of hugetlbfs file failed");
+		goto fail;
 	}
 
 	if (map_shared) {
@@ -227,14 +227,11 @@ static void hugetlb_allocate_area(void **alloc_area)
 				  huge_fd, *alloc_area == area_src ? 0 :
 				  nr_pages * page_size);
 		if (area_alias == MAP_FAILED) {
-			if (munmap(*alloc_area, nr_pages * page_size) < 0) {
-				perror("hugetlb munmap");
-				exit(1);
-			}
-			*alloc_area = NULL;
-			return;
+			perror("mmap of hugetlb file alias failed");
+			goto fail_munmap;
 		}
 	}
+
 	if (*alloc_area == area_src) {
 		huge_fd_off0 = *alloc_area;
 		alloc_area_alias = &area_src_alias;
@@ -243,6 +240,16 @@ static void hugetlb_allocate_area(void **alloc_area)
 	}
 	if (area_alias)
 		*alloc_area_alias = area_alias;
+
+	return;
+
+fail_munmap:
+	if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+		perror("hugetlb munmap");
+		exit(1);
+	}
+fail:
+	*alloc_area = NULL;
 }
 
 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
-- 
cgit v1.2.3


From 23fb55526d80122710c28cb6be0e5dba65a6a3f1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 6 Dec 2020 10:22:22 +0200
Subject: selftests: mlxsw: Test RIF's reference count when joining a LAG

Test that the reference count of a router interface (RIF) configured for
a LAG is incremented / decremented when ports join / leave the LAG. Use
the offload indication on routes configured on the RIF to understand if
it was created / destroyed.

The test fails without the previous patch.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index a2eff5f58209..ed346da5d3cb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -22,6 +22,7 @@ ALL_TESTS="
 	duplicate_vlans_test
 	vlan_rif_refcount_test
 	subport_rif_refcount_test
+	subport_rif_lag_join_test
 	vlan_dev_deletion_test
 	lag_unlink_slaves_test
 	lag_dev_deletion_test
@@ -440,6 +441,48 @@ subport_rif_refcount_test()
 	ip link del dev bond1
 }
 
+subport_rif_lag_join_test()
+{
+	# Test that the reference count of a RIF configured for a LAG is
+	# incremented / decremented when ports join / leave the LAG. We use the
+	# offload indication on routes configured on the RIF to understand if
+	# it was created / destroyed
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link set dev bond1 up
+	ip -6 address add 2001:db8:1::1/64 dev bond1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif was not created on lag device"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed after removing one port"
+
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed after re-adding a port and removing another"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was not destroyed when should"
+
+	log_test "subport rif lag join"
+
+	ip link del dev bond1
+}
+
 vlan_dev_deletion_test()
 {
 	# Test that VLAN devices are correctly deleted / unlinked when enslaved
-- 
cgit v1.2.3


From 6f39cecdb6018234a47dcea15121f01b9903d16b Mon Sep 17 00:00:00 2001
From: Xingxing Su <suxingxing@loongson.cn>
Date: Wed, 25 Nov 2020 12:04:57 +0800
Subject: rseq/selftests: Fix MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ build error
 under other arch.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Except arch x86, the function rseq_offset_deref_addv is not defined.
The function test_membarrier_manager_thread call rseq_offset_deref_addv
produces a build error.

The RSEQ_ARCH_HAS_OFFSET_DEREF_ADD should contain all the code
for the MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ.
If the other Arch implements this feature,
defined RSEQ_ARCH_HAS_OFFSET_DEREF_ADD in the header file
to ensure that this feature is available.

Following build errors:

param_test.c: In function ‘test_membarrier_worker_thread’:
param_test.c:1164:10: warning: implicit declaration of function ‘rseq_offset_deref_addv’
    ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
          ^~~~~~~~~~~~~~~~~~~~~~
/tmp/ccMj9yHJ.o: In function `test_membarrier_worker_thread':
param_test.c:1164: undefined reference to `rseq_offset_deref_addv'
param_test.c:1164: undefined reference to `rseq_offset_deref_addv'
collect2: error: ld returned 1 exit status
make: *** [/selftests/rseq/param_test_benchmark] Error 1

Signed-off-by: Xingxing Su <suxingxing@loongson.cn>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rseq/param_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 384589095864..699ad5f93c34 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1133,6 +1133,8 @@ static int set_signal_handler(void)
 	return ret;
 }
 
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
 struct test_membarrier_thread_args {
 	int stop;
 	intptr_t percpu_list_ptr;
@@ -1286,8 +1288,6 @@ void *test_membarrier_manager_thread(void *arg)
 	return NULL;
 }
 
-/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
-#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
 void test_membarrier(void)
 {
 	const int num_threads = opt_threads;
-- 
cgit v1.2.3


From 88f4ede44c585b24674dd99841040b2a1a856a76 Mon Sep 17 00:00:00 2001
From: Xingxing Su <suxingxing@loongson.cn>
Date: Fri, 27 Nov 2020 11:16:57 +0800
Subject: selftests/clone3: Fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When compiling the selftests with the -std=gnu99 option the build can
fail with.

Following build error:

  test_core.c: In function ‘test_cgcore_destroy’:
  test_core.c:87:2: error: ‘for’ loop initial declarations are only
  allowed in C99 mode
    for (int i = 0; i < 10; i++) {
    ^
  test_core.c:87:2: note: use option -std=c99 or -std=gnu99 to compile

Add -std=gnu99 to the clone3 selftest Makefile to fix this.

Signed-off-by: Xingxing Su <suxingxing@loongson.cn>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/clone3/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
index ef7564cb7abe..79b19a2863a0 100644
--- a/tools/testing/selftests/clone3/Makefile
+++ b/tools/testing/selftests/clone3/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g -std=gnu99 -I../../../../usr/include/
 LDLIBS += -lcap
 
 TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \
-- 
cgit v1.2.3


From 07f262d80d5f1f426da4557066bf0ec24ee32d97 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 23 Nov 2020 16:56:05 -0800
Subject: tools/power/x86/intel-speed-select: Read TRL from mailbox

When SST-PP feature is not present, the TRL (Turbo Ratio Limits)
is read from MSRs. This is done as the mailbox command will fail
on Skylake-X based platform. But for IceLake servers, mailbox
commands can still be used. So add a check to allow for non Skylake
based platforms to read from mail box commands.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-core.c | 2 +-
 tools/power/x86/intel-speed-select/isst.h      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 1d7ecb54352e..8afd23407522 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -804,7 +804,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 				return ret;
 		}
 
-		if (!pkg_dev->enabled) {
+		if (!pkg_dev->enabled && is_skx_based_platform()) {
 			int freq;
 
 			freq = get_cpufreq_base_freq(cpu);
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 29715e9c2e06..60db0bb084d5 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -255,4 +255,5 @@ extern int is_clx_n_platform(void);
 extern int get_cpufreq_base_freq(int cpu);
 extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
 extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
+extern int is_skx_based_platform(void);
 #endif
-- 
cgit v1.2.3


From 6c4832253a2d2259fd4002e3c4511035f81f48f6 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 3 Dec 2020 13:08:37 -0800
Subject: tools/power/x86/intel-speed-select: Account for missing sysfs for
 die_id

Some older kernels will not have support to get CPU die_id from the
sysfs. This requires several back ports. But the tool depends on getting
die_id to match to correct CPU.

Relax this restriction and use die_id as 0 when die_id is missing. This
is not a problem as we don't have any multi-die processors with Intel SST
support.

This helps in running this tool on older kernels with just Intel SST
drivers back ported.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index cd089a505859..271bc79bdc59 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -328,8 +328,12 @@ int get_physical_die_id(int cpu)
 		int core_id, pkg_id, die_id;
 
 		ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
-		if (!ret)
+		if (!ret) {
+			if (die_id < 0)
+				die_id = 0;
+
 			return die_id;
+		}
 	}
 
 	if (ret < 0)
-- 
cgit v1.2.3


From 5e27cb9bca6746d1c9c76c4b4aeececcb18b2120 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 3 Dec 2020 14:29:40 -0800
Subject: tools/power/x86/intel-speed-select: Update version for v5.11

Update version for changes released with v5.11 kernel release.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 271bc79bdc59..5390158cdb40 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.6";
+static const char *version_str = "v1.7";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
-- 
cgit v1.2.3


From 932c60558109a9131e54dacfda6070147fd1cdfb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 4 Dec 2020 15:20:01 -0800
Subject: tools/bpftool: Fix PID fetching with a lot of results

In case of having so many PID results that they don't fit into a singe page
(4096) bytes, bpftool will erroneously conclude that it got corrupted data due
to 4096 not being a multiple of struct pid_iter_entry, so the last entry will
be partially truncated. Fix this by sizing the buffer to fit exactly N entries
with no truncation in the middle of record.

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201204232002.3589803-1-andrii@kernel.org
---
 tools/bpf/bpftool/pids.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index df7d8ec76036..477e55d59c34 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -89,9 +89,9 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level,
 
 int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 {
-	char buf[4096];
-	struct pid_iter_bpf *skel;
 	struct pid_iter_entry *e;
+	char buf[4096 / sizeof(*e) * sizeof(*e)];
+	struct pid_iter_bpf *skel;
 	int err, ret, fd = -1, i;
 	libbpf_print_fn_t default_print;
 
-- 
cgit v1.2.3


From 547f574fd9d5e3925d47fd44decbf6ab6df94b0e Mon Sep 17 00:00:00 2001
From: Mathieu Chouquet-Stringer <me@mathieu.digital>
Date: Wed, 2 Dec 2020 16:32:43 +0100
Subject: docs: Update documentation to reflect what TAINT_CPU_OUT_OF_SPEC
 means

Here's a patch updating the meaning of TAINT_CPU_OUT_OF_SPEC after
Borislav introduced changes in a7e1f67ed29f and upcoming patches in tip.

TAINT_CPU_OUT_OF_SPEC now means a bit more what it implies as the
flag isn't set just because of a CPU misconfiguration or mismatch.
Historically it was for SMP kernel oops on an officially SMP incapable
processor but now it also covers CPUs whose MSRs have been incorrectly
poked at from userspace, drivers being used on non supported
architectures, broken firmware, mismatched CPUs, ...

Update documentation and script to reflect that.

Signed-off-by: Mathieu Chouquet-Stringer <me@mathieu.digital>
Link: https://lore.kernel.org/r/20201202153244.709752-1-me@mathieu.digital
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 tools/debugging/kernel-chktaint | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 2240cb56e6e5..607b2b280945 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -72,7 +72,7 @@ if [ `expr $T % 2` -eq 0 ]; then
 	addout " "
 else
 	addout "S"
-	echo " * SMP kernel oops on an officially SMP incapable processor (#2)"
+	echo " * kernel running on an out of specification system (#2)"
 fi
 
 T=`expr $T / 2`
-- 
cgit v1.2.3


From 4cec85296c7c7a123890d9335b835f991b36e106 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 8 Dec 2020 11:22:52 +0200
Subject: selftests: forwarding: Add Q-in-VNI test

Add test to check Q-in-VNI traffic.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/q_in_vni.sh | 347 +++++++++++++++++++++
 1 file changed, 347 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/q_in_vni.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh
new file mode 100755
index 000000000000..4c50c0234bce
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# |  + $h1.10             |                          |  + $h2.10              |
+# |  | 192.0.2.1/28       |                          |  | 192.0.2.2/28        |
+# |  |                    |                          |  |                     |
+# |  | + $h1.20           |                          |  | + $h2.20            |
+# |  \ | 198.51.100.1/24  |                          |  \ | 198.51.100.2/24   |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1ad)            + $swp2           | |
+# | |    vid 100 pvid untagged                              vid 100 pvid    | |
+# | |                                                           untagged    | |
+# | |  + vx100 (vxlan)                                                      | |
+# | |    local 192.0.2.17                                                   | |
+# | |    remote 192.0.2.34 192.0.2.50                                       | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | |    vid 100 pvid untagged                                              | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  192.0.2.32/28 via 192.0.2.18                                             |
+# |  192.0.2.48/28 via 192.0.2.18                                             |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                 BR2 (802.1ad) | |   | |                 BR2 (802.1ad) | |
+# | |  + vx100 (vxlan)              | |   | |  + vx100 (vxlan)              | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 1000 dstport $VXPORT    | |
+# | |    vid 100 pvid untagged      | |   | |    vid 100 pvid untagged      | |
+# | |                               | |   | |                               | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 100 pvid untagged      | |   | |  | vid 100 pvid untagged      | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |  |\                           | |   | |  |\                           | |
+# | |  | + w2.10                    | |   | |  | + w2.10                    | |
+# | |  |   192.0.2.3/28             | |   | |  |   192.0.2.4/28             | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.20                      | |   | |  + w2.20                      | |
+# | |    198.51.100.3/24            | |   | |    198.51.100.4/24            | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+	vlan_create $h1 20 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	ip link add name vx100 type vxlan id 1000		\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 100 dev $swp2 pvid untagged
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	bridge vlan del vid 100 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr1=$1; shift
+	local host_addr2=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+	bridge vlan add vid 100 dev w1 pvid untagged
+
+	ip link add name vx100 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev vx100 up
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx100 master br2
+	tc qdisc add dev vx100 clsact
+
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	simple_if_init w2
+        vlan_create w2 10 vw2 $host_addr1/28
+        vlan_create w2 20 vw2 $host_addr2/24
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 \
+			     192.0.2.3 198.51.100.3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 \
+			     192.0.2.4 198.51.100.4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2 ": local->local"
+	ping_test $h1 192.0.2.3 ": local->remote 1"
+	ping_test $h1 192.0.2.4 ": local->remote 2"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 477ce6d971159910fb8ae76755c8027aa6a84dde Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Tue, 8 Dec 2020 11:22:53 +0200
Subject: selftests: mlxsw: Add Q-in-VNI veto tests

Add tests to ensure that the forbidden and unsupported cases are indeed
vetoed by mlxsw driver.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh  | 77 ++++++++++++++++++++++
 .../drivers/net/mlxsw/spectrum/q_in_vni_veto.sh    | 66 +++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
new file mode 100755
index 000000000000..0231205a7147
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+VXPORT=4789
+
+ALL_TESTS="
+	create_dot1d_and_dot1ad_vxlans
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_dot1d_and_dot1ad_vxlans()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev vx100 master br0
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
+	ip link set dev br1 up
+
+	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx200 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev vx200 master br1 2>/dev/null
+	check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
+
+	ip link set dev vx200 master br1 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
+
+	log_test "create 802.1d and 802.1ad VxLANs"
+
+	ip link del dev vx200
+	ip link del dev br1
+	ip link del dev vx100
+	ip link del dev br0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
new file mode 100755
index 000000000000..f0443b1b05b9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+VXPORT=4789
+
+ALL_TESTS="
+	create_vxlan_on_top_of_8021ad_bridge
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_vxlan_on_top_of_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev vx100 master br0
+
+	bridge vlan add vid 100 dev vx100 pvid untagged 2>/dev/null
+	check_fail $? "802.1ad bridge with VxLAN in Spectrum-1 not rejected"
+
+	bridge vlan add vid 100 dev vx100 pvid untagged 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad bridge with VxLAN in Spectrum-1 rejected without extack"
+
+	log_test "create VxLAN on top of 802.1ad bridge"
+
+	ip link del dev vx100
+	ip link del dev br0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 0b5b6e747c86e57b7ebd64ccb84314a227ccfcc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:38 +0100
Subject: selftests/bpf/test_offload.py: Remove check for program load flags
 match
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we just removed the xdp_attachment_flags_ok() callback, also remove
the check for it in test_offload.py, and replace it with a test for the new
ambiguity-avoid check when multiple programs are loaded.

Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752225858.110217.13036901876869496246.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 43c9cda199b8..becd27b2f4ba 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -716,13 +716,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid):
     fail(ret == 0, "Replaced one of programs without -force")
     check_extack(err, "XDP program already attached.", args)
 
-    if modename == "" or modename == "drv":
-        othermode = "" if modename == "drv" else "drv"
-        start_test("Test multi-attachment XDP - detach...")
-        ret, _, err = sim.unset_xdp(othermode, force=True,
-                                    fail=False, include_stderr=True)
-        fail(ret == 0, "Removed program with a bad mode")
-        check_extack(err, "program loaded with different flags.", args)
+    start_test("Test multi-attachment XDP - remove without mode...")
+    ret, _, err = sim.unset_xdp("", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program without a mode flag")
+    check_extack(err, "More than one program loaded, unset mode is ambiguous.", args)
 
     sim.unset_xdp("offload")
     xdp = sim.ip_link_show(xdp=True)["xdp"]
@@ -1001,16 +999,6 @@ try:
     check_extack(err,
                  "native and generic XDP can't be active at the same time.",
                  args)
-    ret, _, err = sim.set_xdp(obj, "", force=True,
-                              fail=False, include_stderr=True)
-    fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack(err, "program loaded with different flags.", args)
-
-    start_test("Test XDP prog remove with bad flags...")
-    ret, _, err = sim.unset_xdp("", force=True,
-                                fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode")
-    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
-- 
cgit v1.2.3


From d8b5e76ae4e02908d000397597c6bc2868362fbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:40 +0100
Subject: selftests/bpf/test_offload.py: Only check verifier log on
 verification fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 6f8a57ccf851 ("bpf: Make verifier log more relevant by
default"), the verifier discards log messages for successfully-verified
programs. This broke test_offload.py which is looking for a verification
message from the driver callback. Change test_offload.py to use the toggle
in netdevsim to make the verification fail before looking for the
verification message.

Fixes: 6f8a57ccf851 ("bpf: Make verifier log more relevant by default")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226069.110217.12370824996153348073.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index becd27b2f4ba..61527b43f067 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -911,11 +911,18 @@ try:
 
     sim.tc_flush_filters()
 
+    start_test("Test TC offloads failure...")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+    ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "TC filter did not reject with TC offloads enabled")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+
     start_test("Test TC offloads work...")
     ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret != 0, "TC filter did not load with TC offloads enabled")
-    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test TC offload basics...")
     dfs = simdev.dfs_get_bound_progs(expected=1)
@@ -1032,6 +1039,15 @@ try:
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
+    start_test("Test XDP load failure...")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+    ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+                                 dev=sim['ifname'], fail=False, include_stderr=True)
+    fail(ret == 0, "verifier should fail on load")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+    sim.wait_for_flush()
+
     start_test("Test XDP offload...")
     _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
     ipl = sim.ip_link_show(xdp=True)
@@ -1039,7 +1055,6 @@ try:
     progs = bpftool_prog_list(expected=1)
     prog = progs[0]
     fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
-    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test XDP offload is device bound...")
     dfs = simdev.dfs_get_bound_progs(expected=1)
-- 
cgit v1.2.3


From 852c2ee338f0ac6026458615b624e1c496142cf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:41 +0100
Subject: selftests/bpf/test_offload.py: Fix expected case of extack messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs
in net_device") changed the case of some of the extack messages being
returned when attaching of XDP programs failed. This broke test_offload.py,
so let's fix the test to reflect this.

Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226175.110217.11214100824416344952.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 61527b43f067..51a5e4d939cc 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1004,7 +1004,7 @@ try:
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
     check_extack(err,
-                 "native and generic XDP can't be active at the same time.",
+                 "Native and generic XDP can't be active at the same time.",
                  args)
 
     start_test("Test MTU restrictions...")
@@ -1035,7 +1035,7 @@ try:
     offload = bpf_pinned("/sys/fs/bpf/offload")
     ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
     fail(ret == 0, "attached offloaded XDP program to drv")
-    check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+    check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args)
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
-- 
cgit v1.2.3


From 766e62b7fcd2cf1d43e6594ba37c659dc48f7ddb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:42 +0100
Subject: selftests/bpf/test_offload.py: Reset ethtool features after failed
 setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When setting the ethtool feature flag fails (as expected for the test), the
kernel now tracks that the feature was requested to be 'off' and refuses to
subsequently disable it again. So reset it back to 'on' so a subsequent
disable (that's not supposed to fail) can succeed.

Fixes: 417ec26477a5 ("selftests/bpf: add offload test based on netdevsim")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226280.110217.10696241563705667871.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 51a5e4d939cc..2128fbd8414b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -946,6 +946,7 @@ try:
     start_test("Test disabling TC offloads is rejected while filters installed...")
     ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
     fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+    sim.set_ethtool_tc_offloads(True)
 
     start_test("Test qdisc removal frees things...")
     sim.tc_flush_filters()
-- 
cgit v1.2.3


From 8158cad13435639cd4962fb88970960f880ef6d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:43 +0100
Subject: selftests/bpf/test_offload.py: Filter bpftool internal map when
 counting maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few of the tests in test_offload.py expects to see a certain number of
maps created, and checks this by counting the number of maps returned by
bpftool. There is already a filter that will remove any maps already there
at the beginning of the test, but bpftool now creates a map for the PID
iterator rodata on each invocation, which makes the map count wrong. Fix
this by also filtering the pid_iter.rodata map by name when counting.

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226387.110217.9887866138149423444.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 2128fbd8414b..b99bb8ed3ed4 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -184,9 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
     # Remove the base maps
-    for m in base_maps:
-        if m in maps:
-            maps.remove(m)
+    maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -770,6 +768,9 @@ ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
 base_progs = progs
 _, base_maps = bpftool("map")
+base_map_names = [
+    'pid_iter.rodata' # created on each bpftool invocation
+]
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
-- 
cgit v1.2.3


From a89052572ebbf4bcee7c39390640e92b60eaa653 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:29 +0000
Subject: selftests/bpf: Xsk selftests framework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds AF_XDP selftests framework under selftests/bpf.

Topology:
---------
     -----------           -----------
     |  xskX   | --------- |  xskY   |
     -----------     |     -----------
          |          |          |
     -----------     |     ----------
     |  vethX  | --------- |  vethY |
     -----------   peer    ----------
          |          |          |
     namespaceX      |     namespaceY

Prerequisites setup by script test_xsk.sh:

   Set up veth interfaces as per the topology shown ^^:
   * setup two veth interfaces and one namespace
   ** veth<xxxx> in root namespace
   ** veth<yyyy> in af_xdp<xxxx> namespace
   ** namespace af_xdp<xxxx>
   * create a spec file veth.spec that includes this run-time configuration
   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
       conflict with any existing interface
   * tests the veth and xsk layers of the topology

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-2-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/Makefile       |   4 +-
 tools/testing/selftests/bpf/test_xsk.sh    | 152 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/xsk_prereqs.sh | 119 ++++++++++++++++++++++
 3 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/bpf/test_xsk.sh
 create mode 100755 tools/testing/selftests/bpf/xsk_prereqs.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..6a1ddfe68f15 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -46,7 +46,8 @@ endif
 
 TEST_GEN_FILES =
 TEST_FILES = test_lwt_ip_encap.o \
-	test_tc_edt.o
+	test_tc_edt.o \
+	xsk_prereqs.sh
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -70,6 +71,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
+	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
new file mode 100755
index 000000000000..cae4c5574c4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation, Weqaar Janjua <weqaar.a.janjua@intel.com>
+
+# AF_XDP selftests based on veth
+#
+# End-to-end AF_XDP over Veth test
+#
+# Topology:
+# ---------
+#      -----------           -----------
+#      |  xskX   | --------- |  xskY   |
+#      -----------     |     -----------
+#           |          |          |
+#      -----------     |     ----------
+#      |  vethX  | --------- |  vethY |
+#      -----------   peer    ----------
+#           |          |          |
+#      namespaceX      |     namespaceY
+#
+# AF_XDP is an address family optimized for high performance packet processing,
+# it is XDP’s user-space interface.
+#
+# An AF_XDP socket is linked to a single UMEM which is a region of virtual
+# contiguous memory, divided into equal-sized frames.
+#
+# Refer to AF_XDP Kernel Documentation for detailed information:
+# https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+#
+# Prerequisites setup by script:
+#
+#   Set up veth interfaces as per the topology shown ^^:
+#   * setup two veth interfaces and one namespace
+#   ** veth<xxxx> in root namespace
+#   ** veth<yyyy> in af_xdp<xxxx> namespace
+#   ** namespace af_xdp<xxxx>
+#   * create a spec file veth.spec that includes this run-time configuration
+#   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
+#       conflict with any existing interface
+#   * tests the veth and xsk layers of the topology
+#
+# Kernel configuration:
+# ---------------------
+# See "config" file for recommended kernel config options.
+#
+# Turn on XDP sockets and veth support when compiling i.e.
+# 	Networking support -->
+# 		Networking options -->
+# 			[ * ] XDP sockets
+#
+# Executing Tests:
+# ----------------
+# Must run with CAP_NET_ADMIN capability.
+#
+# Run (full color-coded output):
+#   sudo ./test_xsk.sh -c
+#
+# If running from kselftests:
+#   sudo make colorconsole=1 run_tests
+#
+# Run (full output without color-coding):
+#   sudo ./test_xsk.sh
+
+. xsk_prereqs.sh
+
+while getopts c flag
+do
+	case "${flag}" in
+		c) colorconsole=1;;
+	esac
+done
+
+TEST_NAME="PREREQUISITES"
+
+URANDOM=/dev/urandom
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+
+VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH0=ve${VETH0_POSTFIX}
+VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH1=ve${VETH1_POSTFIX}
+NS0=root
+NS1=af_xdp${VETH1_POSTFIX}
+MTU=1500
+
+setup_vethPairs() {
+	echo "setting up ${VETH0}: namespace: ${NS0}"
+	ip netns add ${NS1}
+	ip link add ${VETH0} type veth peer name ${VETH1}
+	if [ -f /proc/net/if_inet6 ]; then
+		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+	fi
+	echo "setting up ${VETH1}: namespace: ${NS1}"
+	ip link set ${VETH1} netns ${NS1}
+	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
+	ip link set ${VETH0} mtu ${MTU}
+	ip netns exec ${NS1} ip link set ${VETH1} up
+	ip link set ${VETH0} up
+}
+
+validate_root_exec
+validate_veth_support ${VETH0}
+validate_ip_utility
+setup_vethPairs
+
+retval=$?
+if [ $retval -ne 0 ]; then
+	test_status $retval "${TEST_NAME}"
+	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	exit $retval
+fi
+
+echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
+
+validate_veth_spec_file
+
+echo "Spec file created: ${SPECFILE}"
+
+test_status $retval "${TEST_NAME}"
+
+## START TESTS
+
+statusList=()
+
+### TEST 1
+TEST_NAME="XSK KSELFTEST FRAMEWORK"
+
+echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+retval=$?
+if [ $retval -eq 0 ]; then
+	echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
+	vethXDPnative ${VETH0} ${VETH1} ${NS1}
+fi
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+## END TESTS
+
+cleanup_exit ${VETH0} ${VETH1} ${NS1}
+
+for _status in "${statusList[@]}"
+do
+	if [ $_status -ne 0 ]; then
+		test_exit $ksft_fail 0
+	fi
+done
+
+test_exit $ksft_pass 0
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
new file mode 100755
index 000000000000..29762739c21b
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation.
+
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
+ksft_xpass=3
+ksft_skip=4
+
+GREEN='\033[0;92m'
+YELLOW='\033[0;93m'
+RED='\033[0;31m'
+NC='\033[0m'
+STACK_LIM=131072
+SPECFILE=veth.spec
+
+validate_root_exec()
+{
+	msg="skip all tests:"
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		test_exit $ksft_fail 2
+	else
+		return $ksft_pass
+	fi
+}
+
+validate_veth_support()
+{
+	msg="skip all tests:"
+	if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
+		echo $msg veth kernel support not available >&2
+		test_exit $ksft_skip 1
+	else
+		ip link del $1
+		return $ksft_pass
+	fi
+}
+
+validate_veth_spec_file()
+{
+	if [ ! -f ${SPECFILE} ]; then
+		test_exit $ksft_skip 1
+	fi
+}
+
+test_status()
+{
+	statusval=$1
+	if [ -n "${colorconsole+set}" ]; then
+		if [ $statusval -eq 2 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
+		elif [ $statusval -eq 1 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
+		elif [ $statusval -eq 0 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
+		fi
+	else
+		if [ $statusval -eq 2 ]; then
+			echo -e "$2: [ FAIL ]"
+		elif [ $statusval -eq 1 ]; then
+			echo -e "$2: [ SKIPPED ]"
+		elif [ $statusval -eq 0 ]; then
+			echo -e "$2: [ PASS ]"
+		fi
+	fi
+}
+
+test_exit()
+{
+	retval=$1
+	if [ $2 -ne 0 ]; then
+		test_status $2 $(basename $0)
+	fi
+	exit $retval
+}
+
+clear_configs()
+{
+	if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
+		[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
+			{ echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
+		echo "removing ns $3"
+		ip netns del $3
+	fi
+	#Once we delete a veth pair node, the entire veth pair is removed,
+	#this is just to be cautious just incase the NS does not exist then
+	#veth node inside NS won't get removed so we explicitly remove it
+	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
+		{ echo "removing link $1"; ip link del $1; }
+	if [ -f ${SPECFILE} ]; then
+		echo "removing spec file:" ${SPECFILE}
+		rm -f ${SPECFILE}
+	fi
+}
+
+cleanup_exit()
+{
+	echo "cleaning up..."
+	clear_configs $1 $2 $3
+}
+
+validate_ip_utility()
+{
+	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+}
+
+vethXDPgeneric()
+{
+	ip link set dev $1 xdpdrv off
+	ip netns exec $3 ip link set dev $2 xdpdrv off
+}
+
+vethXDPnative()
+{
+	ip link set dev $1 xdpgeneric off
+	ip netns exec $3 ip link set dev $2 xdpgeneric off
+}
-- 
cgit v1.2.3


From facb7cb2e909ad2d21ebbfdc051726d4cd8f1d35 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:30 +0000
Subject: selftests/bpf: Xsk selftests - SKB POLL, NOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   Generic mode XDP is driver independent, used when the driver does
   not have support for XDP. Works on any netdevice using sockets and
   generic XDP path. XDP hook from netif_receive_skb().
   a. nopoll - soft-irq processing
   b. poll - using poll() syscall

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-3-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/Makefile       |   3 +-
 tools/testing/selftests/bpf/test_xsk.sh    |  39 +-
 tools/testing/selftests/bpf/xdpxceiver.c   | 979 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.h   | 153 +++++
 tools/testing/selftests/bpf/xsk_prereqs.sh |  16 +
 5 files changed, 1187 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/xdpxceiver.c
 create mode 100644 tools/testing/selftests/bpf/xdpxceiver.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a1ddfe68f15..944ae17a39ed 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,8 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko
+	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
+	xdpxceiver
 
 TEST_CUSTOM_PROGS = urandom_read
 
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index cae4c5574c4c..0b7bafb65f43 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -8,8 +8,17 @@
 #
 # Topology:
 # ---------
-#      -----------           -----------
-#      |  xskX   | --------- |  xskY   |
+#                 -----------
+#               _ | Process | _
+#              /  -----------  \
+#             /        |        \
+#            /         |         \
+#      -----------     |     -----------
+#      | Thread1 |     |     | Thread2 |
+#      -----------     |     -----------
+#           |          |          |
+#      -----------     |     -----------
+#      |  xskX   |     |     |  xskY   |
 #      -----------     |     -----------
 #           |          |          |
 #      -----------     |     ----------
@@ -39,6 +48,8 @@
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
 #
+# See the source xdpxceiver.c for information on each test
+#
 # Kernel configuration:
 # ---------------------
 # See "config" file for recommended kernel config options.
@@ -138,6 +149,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 2
+TEST_NAME="SKB NOPOLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 3
+TEST_NAME="SKB POLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
new file mode 100644
index 000000000000..3f2a65b6a9f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -0,0 +1,979 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * The following tests are run:
+ *
+ * 1. AF_XDP SKB mode
+ *    Generic mode XDP is driver independent, used when the driver does
+ *    not have support for XDP. Works on any netdevice using sockets and
+ *    generic XDP path. XDP hook from netif_receive_skb().
+ *    a. nopoll - soft-irq processing
+ *    b. poll - using poll() syscall
+ *
+ * Total tests: 2
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface within their assigned
+ *   namespaces
+ * - Each thread Creates one AF_XDP socket connected to a unique umem for each
+ *   veth interface
+ * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all 10k packets were received and delivered in-order,
+ *   and have the right content
+ *
+ * Enable/disable debug mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <asm/barrier.h>
+typedef __u16 __sum16;
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include <bpf/xsk.h>
+#include "xdpxceiver.h"
+#include "../kselftest.h"
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+	ksft_test_result_fail
+	    ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+	ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+#define print_ksft_result(void)\
+	(ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+
+static void pthread_init_mutex(void)
+{
+	pthread_mutex_init(&sync_mutex, NULL);
+	pthread_mutex_init(&sync_mutex_tx, NULL);
+	pthread_cond_init(&signal_rx_condition, NULL);
+	pthread_cond_init(&signal_tx_condition, NULL);
+}
+
+static void pthread_destroy_mutex(void)
+{
+	pthread_mutex_destroy(&sync_mutex);
+	pthread_mutex_destroy(&sync_mutex_tx);
+	pthread_cond_destroy(&signal_rx_condition);
+	pthread_cond_destroy(&signal_tx_condition);
+}
+
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+	u32 *ptr = (u32 *)dest;
+	int i;
+
+	val = htonl(val);
+
+	for (i = 0; i < (size & (~0x3)); i += 4)
+		ptr[i >> 2] = val;
+
+	for (; i < size; i++)
+		((char *)dest)[i] = ((char *)&val)[i & 3];
+
+	return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+	/* add up 16-bit and 16-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16 csum_fold(__u32 csum)
+{
+	u32 sum = (__force u32)csum;
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __u16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+	/* add up 32-bit and 32-bit for 32+c bit */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up carry.. */
+	x = (x & 0xffffffff) + (x >> 32);
+	return (u32)x;
+}
+
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+	unsigned long long s = (__force u32)sum;
+
+	s += (__force u32)saddr;
+	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
+	return (__force __u32)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+{
+	u32 csum = 0;
+	u32 cnt = 0;
+
+	/* udp hdr and data */
+	for (; cnt < len; cnt += 2)
+		csum += udp_pkt[cnt >> 1];
+
+	return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr)
+{
+	memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN);
+	eth_hdr->h_proto = htons(ETH_P_IP);
+}
+
+static void gen_ip_hdr(void *data, struct iphdr *ip_hdr)
+{
+	ip_hdr->version = IP_PKT_VER;
+	ip_hdr->ihl = 0x5;
+	ip_hdr->tos = IP_PKT_TOS;
+	ip_hdr->tot_len = htons(IP_PKT_SIZE);
+	ip_hdr->id = 0;
+	ip_hdr->frag_off = 0;
+	ip_hdr->ttl = IPDEFTTL;
+	ip_hdr->protocol = IPPROTO_UDP;
+	ip_hdr->saddr = ((struct ifobject *)data)->src_ip;
+	ip_hdr->daddr = ((struct ifobject *)data)->dst_ip;
+	ip_hdr->check = 0;
+}
+
+static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr)
+{
+	udp_hdr->source = htons(((struct ifobject *)arg)->src_port);
+	udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port);
+	udp_hdr->len = htons(UDP_PKT_SIZE);
+	memset32_htonl(pkt_data + PKT_HDR_SIZE,
+		       htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE);
+}
+
+static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+{
+	udp_hdr->check = 0;
+	udp_hdr->check =
+	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+{
+	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
+}
+
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+{
+	int ret;
+
+	data->umem = calloc(1, sizeof(struct xsk_umem_info));
+	if (!data->umem)
+		exit_with_error(errno);
+
+	ret = xsk_umem__create(&data->umem->umem, buffer, size,
+			       &data->umem->fq, &data->umem->cq, NULL);
+	if (ret)
+		exit_with_error(ret);
+
+	data->umem->buffer = buffer;
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
+{
+	int ret, i;
+	u32 idx;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
+	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		exit_with_error(ret);
+	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+}
+
+static int xsk_configure_socket(struct ifobject *ifobject)
+{
+	struct xsk_socket_config cfg;
+	struct xsk_ring_cons *rxr;
+	struct xsk_ring_prod *txr;
+	int ret;
+
+	ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
+	if (!ifobject->xsk)
+		exit_with_error(errno);
+
+	ifobject->xsk->umem = ifobject->umem;
+	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	cfg.libbpf_flags = 0;
+	cfg.xdp_flags = opt_xdp_flags;
+	cfg.bind_flags = opt_xdp_bind_flags;
+
+	rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+	txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+
+	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
+				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
+
+	if (ret)
+		return 1;
+
+	return 0;
+}
+
+static struct option long_options[] = {
+	{"interface", required_argument, 0, 'i'},
+	{"queue", optional_argument, 0, 'q'},
+	{"poll", no_argument, 0, 'p'},
+	{"xdp-skb", no_argument, 0, 'S'},
+	{"copy", no_argument, 0, 'c'},
+	{"debug", optional_argument, 0, 'D'},
+	{"tx-pkt-count", optional_argument, 0, 'C'},
+	{0, 0, 0, 0}
+};
+
+static void usage(const char *prog)
+{
+	const char *str =
+	    "  Usage: %s [OPTIONS]\n"
+	    "  Options:\n"
+	    "  -i, --interface      Use interface\n"
+	    "  -q, --queue=n        Use queue n (default 0)\n"
+	    "  -p, --poll           Use poll syscall\n"
+	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
+	    "  -c, --copy           Force copy mode\n"
+	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
+	    "  -C, --tx-pkt-count=n Number of packets to send\n";
+	ksft_print_msg(str, prog);
+}
+
+static bool switch_namespace(int idx)
+{
+	char fqns[26] = "/var/run/netns/";
+	int nsfd;
+
+	strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+	nsfd = open(fqns, O_RDONLY);
+
+	if (nsfd == -1)
+		exit_with_error(errno);
+
+	if (setns(nsfd, 0) == -1)
+		exit_with_error(errno);
+
+	return true;
+}
+
+static void *nsswitchthread(void *args)
+{
+	if (switch_namespace(((struct targs *)args)->idx)) {
+		ifdict[((struct targs *)args)->idx]->ifindex =
+		    if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname);
+		if (!ifdict[((struct targs *)args)->idx]->ifindex) {
+			ksft_test_result_fail
+			    ("ERROR: [%s] interface \"%s\" does not exist\n",
+			     __func__, ifdict[((struct targs *)args)->idx]->ifname);
+			((struct targs *)args)->retptr = false;
+		} else {
+			ksft_print_msg("Interface found: %s\n",
+				       ifdict[((struct targs *)args)->idx]->ifname);
+			((struct targs *)args)->retptr = true;
+		}
+	} else {
+		((struct targs *)args)->retptr = false;
+	}
+	pthread_exit(NULL);
+}
+
+static int validate_interfaces(void)
+{
+	bool ret = true;
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		if (!strcmp(ifdict[i]->ifname, "")) {
+			ret = false;
+			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
+		}
+		if (strcmp(ifdict[i]->nsname, "")) {
+			struct targs *targs;
+
+			targs = (struct targs *)malloc(sizeof(struct targs));
+			if (!targs)
+				exit_with_error(errno);
+
+			targs->idx = i;
+			if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs))
+				exit_with_error(errno);
+
+			pthread_join(ns_thread, NULL);
+
+			if (targs->retptr)
+				ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
+
+			free(targs);
+		} else {
+			ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
+			if (!ifdict[i]->ifindex) {
+				ksft_test_result_fail
+				    ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
+				ret = false;
+			} else {
+				ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
+			}
+		}
+	}
+	return ret;
+}
+
+static void parse_command_line(int argc, char **argv)
+{
+	int option_index, interface_index = 0, c;
+
+	opterr = 0;
+
+	for (;;) {
+		c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'i':
+			if (interface_index == MAX_INTERFACES)
+				break;
+			char *sptr, *token;
+
+			sptr = strndupa(optarg, strlen(optarg));
+			memcpy(ifdict[interface_index]->ifname,
+			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+			token = strsep(&sptr, ",");
+			if (token)
+				memcpy(ifdict[interface_index]->nsname, token,
+				       MAX_INTERFACES_NAMESPACE_CHARS);
+			interface_index++;
+			break;
+		case 'q':
+			opt_queue = atoi(optarg);
+			break;
+		case 'p':
+			opt_poll = 1;
+			break;
+		case 'S':
+			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
+			opt_xdp_bind_flags |= XDP_COPY;
+			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
+			break;
+		case 'c':
+			opt_xdp_bind_flags |= XDP_COPY;
+			break;
+		case 'D':
+			debug_pkt_dump = 1;
+			break;
+		case 'C':
+			opt_pkt_count = atoi(optarg);
+			break;
+		default:
+			usage(basename(argv[0]));
+			ksft_exit_xfail();
+		}
+	}
+
+	if (!validate_interfaces()) {
+		usage(basename(argv[0]));
+		ksft_exit_xfail();
+	}
+}
+
+static void kick_tx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
+		return;
+	exit_with_error(errno);
+}
+
+static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+{
+	unsigned int rcvd;
+	u32 idx;
+
+	if (!xsk->outstanding_tx)
+		return;
+
+	if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+		kick_tx(xsk);
+
+	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+	if (rcvd) {
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+		xsk->outstanding_tx -= rcvd;
+		xsk->tx_npkts += rcvd;
+	}
+}
+
+static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+{
+	unsigned int rcvd, i;
+	u32 idx_rx = 0, idx_fq = 0;
+	int ret;
+
+	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+	if (!rcvd) {
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(ret);
+		}
+		return;
+	}
+
+	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	while (ret != rcvd) {
+		if (ret < 0)
+			exit_with_error(ret);
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(ret);
+		}
+		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	}
+
+	for (i = 0; i < rcvd; i++) {
+		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+		(void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+		u64 orig = xsk_umem__extract_addr(addr);
+
+		addr = xsk_umem__add_offset_to_addr(addr);
+		pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
+		if (!pkt_node_rx)
+			exit_with_error(errno);
+
+		pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE);
+		if (!pkt_node_rx->pkt_frame)
+			exit_with_error(errno);
+
+		memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
+		       PKT_SIZE);
+
+		TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+
+		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+	}
+
+	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+	xsk_ring_cons__release(&xsk->rx, rcvd);
+	xsk->rx_npkts += rcvd;
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+{
+	u32 idx;
+	unsigned int i;
+
+	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
+		complete_tx_only(xsk, batch_size);
+
+	for (i = 0; i < batch_size; i++) {
+		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+		tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+		tx_desc->len = PKT_SIZE;
+	}
+
+	xsk_ring_prod__submit(&xsk->tx, batch_size);
+	xsk->outstanding_tx += batch_size;
+	*frameptr += batch_size;
+	*frameptr %= num_frames;
+	complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+	if (!opt_pkt_count)
+		return BATCH_SIZE;
+
+	if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
+		return BATCH_SIZE;
+
+	return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void *arg)
+{
+	bool pending;
+
+	do {
+		pending = false;
+		if (((struct ifobject *)arg)->xsk->outstanding_tx) {
+			complete_tx_only(((struct ifobject *)
+					  arg)->xsk, BATCH_SIZE);
+			pending = !!((struct ifobject *)arg)->xsk->outstanding_tx;
+		}
+	} while (pending);
+}
+
+static void tx_only_all(void *arg)
+{
+	struct pollfd fds[MAX_SOCKS] = { };
+	u32 frame_nb = 0;
+	int pkt_cnt = 0;
+	int ret;
+
+	fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+	fds[0].events = POLLOUT;
+
+	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+		int batch_size = get_batch_size(pkt_cnt);
+
+		if (opt_poll) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret <= 0)
+				continue;
+
+			if (!(fds[0].revents & POLLOUT))
+				continue;
+		}
+
+		tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size);
+		pkt_cnt += batch_size;
+	}
+
+	if (opt_pkt_count)
+		complete_tx_only_all(arg);
+}
+
+static void worker_pkt_dump(void)
+{
+	struct in_addr ipaddr;
+
+	fprintf(stdout, "---------------------------------------\n");
+	for (int iter = 0; iter < num_frames - 1; iter++) {
+		/*extract L2 frame */
+		fprintf(stdout, "DEBUG>> L2: dst mac: ");
+		for (int i = 0; i < ETH_ALEN; i++)
+			fprintf(stdout, "%02X", ((struct ethhdr *)
+						 pkt_buf[iter]->payload)->h_dest[i]);
+
+		fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+		for (int i = 0; i < ETH_ALEN; i++)
+			fprintf(stdout, "%02X", ((struct ethhdr *)
+						 pkt_buf[iter]->payload)->h_source[i]);
+
+		/*extract L3 frame */
+		fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
+			((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
+
+		ipaddr.s_addr =
+		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
+
+		ipaddr.s_addr =
+		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
+
+		/*extract L4 frame */
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
+			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+						 sizeof(struct ethhdr) +
+						 sizeof(struct iphdr)))->source));
+
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
+			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+						 sizeof(struct ethhdr) +
+						 sizeof(struct iphdr)))->dest));
+		/*extract L5 frame */
+		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+
+		if (payload == EOT) {
+			ksft_print_msg("End-of-tranmission frame received\n");
+			fprintf(stdout, "---------------------------------------\n");
+			break;
+		}
+		fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+		fprintf(stdout, "---------------------------------------\n");
+	}
+}
+
+static void worker_pkt_validate(void)
+{
+	u32 payloadseqnum = -2;
+
+	while (1) {
+		pkt_node_rx_q = malloc(sizeof(struct pkt));
+		pkt_node_rx_q = TAILQ_LAST(&head, head_s);
+		if (!pkt_node_rx_q)
+			break;
+		/*do not increment pktcounter if !(tos=0x9 and ipv4) */
+		if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->version == IP_PKT_VER)
+		    && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos ==
+			IP_PKT_TOS)) {
+			payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
+			if (debug_pkt_dump && payloadseqnum != EOT) {
+				pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame));
+				pkt_obj->payload = (char *)malloc(PKT_SIZE);
+				memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
+				pkt_buf[payloadseqnum] = pkt_obj;
+			}
+
+			if (payloadseqnum == EOT) {
+				ksft_print_msg("End-of-tranmission frame received: PASS\n");
+				sigvar = 1;
+				break;
+			}
+
+			if (prev_pkt + 1 != payloadseqnum) {
+				ksft_test_result_fail
+				    ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
+				     __func__, prev_pkt, payloadseqnum);
+				ksft_exit_xfail();
+			}
+
+			TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+			free(pkt_node_rx_q->pkt_frame);
+			free(pkt_node_rx_q);
+			pkt_node_rx_q = NULL;
+			prev_pkt = payloadseqnum;
+			pkt_counter++;
+		} else {
+			ksft_print_msg("Invalid frame received: ");
+			ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n",
+				((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->version,
+				((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->tos);
+			TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+			free(pkt_node_rx_q->pkt_frame);
+			free(pkt_node_rx_q);
+			pkt_node_rx_q = NULL;
+		}
+	}
+}
+
+static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr,
+			      atomic_int *spinningptr)
+{
+	int ctr = 0;
+	int ret;
+
+	xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+	ret = xsk_configure_socket((struct ifobject *)arg);
+
+	/* Retry Create Socket if it fails as xsk_socket__create()
+	 * is asynchronous
+	 *
+	 * Essential to lock Mutex here to prevent Tx thread from
+	 * entering before Rx and causing a deadlock
+	 */
+	pthread_mutex_lock(mutexptr);
+	while (ret && ctr < SOCK_RECONF_CTR) {
+		atomic_store(spinningptr, 1);
+		xsk_configure_umem((struct ifobject *)arg,
+				   bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+		ret = xsk_configure_socket((struct ifobject *)arg);
+		usleep(USLEEP_MAX);
+		ctr++;
+	}
+	atomic_store(spinningptr, 0);
+	pthread_mutex_unlock(mutexptr);
+
+	if (ctr >= SOCK_RECONF_CTR)
+		exit_with_error(ret);
+}
+
+static void *worker_testapp_validate(void *arg)
+{
+	struct udphdr *udp_hdr =
+	    (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+	struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
+	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
+	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+	void *bufs;
+
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
+
+	if (strcmp(((struct ifobject *)arg)->nsname, ""))
+		switch_namespace(((struct ifobject *)arg)->ifdict_index);
+
+	if (((struct ifobject *)arg)->fv.vector == tx) {
+		int spinningrxctr = 0;
+
+		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+
+		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
+			spinningrxctr++;
+			usleep(USLEEP_MAX);
+		}
+
+		ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname);
+		for (int i = 0; i < num_frames; i++) {
+			/*send EOT frame */
+			if (i == (num_frames - 1))
+				data->seqnum = -1;
+			else
+				data->seqnum = i;
+			gen_udp_hdr((void *)data, (void *)arg, udp_hdr);
+			gen_ip_hdr((void *)arg, ip_hdr);
+			gen_udp_csum(udp_hdr, ip_hdr);
+			gen_eth_hdr((void *)arg, eth_hdr);
+			gen_eth_frame(((struct ifobject *)arg)->umem,
+				      i * XSK_UMEM__DEFAULT_FRAME_SIZE);
+		}
+
+		free(data);
+		ksft_print_msg("Sending %d packets on interface %s\n",
+			       (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname);
+		tx_only_all(arg);
+	} else if (((struct ifobject *)arg)->fv.vector == rx) {
+		struct pollfd fds[MAX_SOCKS] = { };
+		int ret;
+
+		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+
+		ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
+		xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
+
+		TAILQ_INIT(&head);
+		if (debug_pkt_dump) {
+			pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames);
+			if (!pkt_buf)
+				exit_with_error(errno);
+		}
+
+		fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+		fds[0].events = POLLIN;
+
+		pthread_mutex_lock(&sync_mutex);
+		pthread_cond_signal(&signal_rx_condition);
+		pthread_mutex_unlock(&sync_mutex);
+
+		while (1) {
+			if (opt_poll) {
+				ret = poll(fds, 1, POLL_TMOUT);
+				if (ret <= 0)
+					continue;
+			}
+			rx_pkt(((struct ifobject *)arg)->xsk, fds);
+			worker_pkt_validate();
+
+			if (sigvar)
+				break;
+		}
+
+		ksft_print_msg("Received %d packets on interface %s\n",
+			       pkt_counter, ((struct ifobject *)arg)->ifname);
+	}
+
+	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+	(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	pthread_exit(NULL);
+}
+
+static void testapp_validate(void)
+{
+	pthread_attr_init(&attr);
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	pthread_mutex_lock(&sync_mutex);
+
+	/*Spawn RX thread */
+	if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+		exit_with_error(errno);
+
+	struct timespec max_wait = { 0, 0 };
+
+	if (clock_gettime(CLOCK_REALTIME, &max_wait))
+		exit_with_error(errno);
+	max_wait.tv_sec += TMOUT_SEC;
+
+	if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
+		exit_with_error(errno);
+
+	pthread_mutex_unlock(&sync_mutex);
+
+	/*Spawn TX thread */
+	if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+		exit_with_error(errno);
+
+	pthread_join(t1, NULL);
+	pthread_join(t0, NULL);
+
+	if (debug_pkt_dump) {
+		worker_pkt_dump();
+		for (int iter = 0; iter < num_frames - 1; iter++) {
+			free(pkt_buf[iter]->payload);
+			free(pkt_buf[iter]);
+		}
+		free(pkt_buf);
+	}
+
+	print_ksft_result();
+}
+
+static void init_iface_config(void *ifaceconfig)
+{
+	/*Init interface0 */
+	ifdict[0]->fv.vector = tx;
+	memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+	memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+	ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+	ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+	ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+	ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+
+	/*Init interface1 */
+	ifdict[1]->fv.vector = rx;
+	memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+	memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+	ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+	ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+	ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+	ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+
+	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
+		exit_with_error(errno);
+
+	const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+	const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+	const char *IP1 = "192.168.100.162";
+	const char *IP2 = "192.168.100.161";
+	u16 UDP_DST_PORT = 2020;
+	u16 UDP_SRC_PORT = 2121;
+
+	ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj));
+	memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
+	memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
+	inet_aton(IP1, &ifaceconfig->dst_ip);
+	inet_aton(IP2, &ifaceconfig->src_ip);
+	ifaceconfig->dst_port = UDP_DST_PORT;
+	ifaceconfig->src_port = UDP_SRC_PORT;
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject));
+		if (!ifdict[i])
+			exit_with_error(errno);
+
+		ifdict[i]->ifdict_index = i;
+	}
+
+	setlocale(LC_ALL, "");
+
+	parse_command_line(argc, argv);
+
+	num_frames = ++opt_pkt_count;
+
+	init_iface_config((void *)ifaceconfig);
+
+	pthread_init_mutex();
+
+	ksft_set_plan(1);
+
+	testapp_validate();
+
+	for (int i = 0; i < MAX_INTERFACES; i++)
+		free(ifdict[i]);
+
+	pthread_destroy_mutex();
+
+	ksft_exit_pass();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
new file mode 100644
index 000000000000..5929f2fc1224
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XDPXCEIVER_H_
+#define XDPXCEIVER_H_
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define MAX_INTERFACES 2
+#define MAX_INTERFACE_NAME_CHARS 7
+#define MAX_INTERFACES_NAMESPACE_CHARS 10
+#define MAX_SOCKS 1
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			sizeof(struct udphdr))
+#define MIN_PKT_SIZE 64
+#define ETH_FCS_SIZE 4
+#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define IP_PKT_VER 0x4
+#define IP_PKT_TOS 0x9
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define TMOUT_SEC (3)
+#define EOT (-1)
+#define USLEEP_MAX 200000
+#define THREAD_STACK 60000000
+#define SOCK_RECONF_CTR 10
+#define BATCH_SIZE 64
+#define POLL_TMOUT 1000
+#define NEED_WAKEUP 1
+
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+enum TESTS {
+	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+};
+
+u8 uut;
+u8 debug_pkt_dump;
+u32 num_frames;
+
+static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int opt_queue;
+static int opt_pkt_count;
+static int opt_poll;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+static u32 pkt_counter;
+static u32 prev_pkt = -1;
+static int sigvar;
+
+struct xsk_umem_info {
+	struct xsk_ring_prod fq;
+	struct xsk_ring_cons cq;
+	struct xsk_umem *umem;
+	void *buffer;
+};
+
+struct xsk_socket_info {
+	struct xsk_ring_cons rx;
+	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
+	struct xsk_socket *xsk;
+	unsigned long rx_npkts;
+	unsigned long tx_npkts;
+	unsigned long prev_rx_npkts;
+	unsigned long prev_tx_npkts;
+	u32 outstanding_tx;
+};
+
+struct flow_vector {
+	enum fvector {
+		tx,
+		rx,
+		bidi,
+		undef,
+	} vector;
+};
+
+struct generic_data {
+	u32 seqnum;
+};
+
+struct ifaceconfigobj {
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	struct in_addr dst_ip;
+	struct in_addr src_ip;
+	u16 src_port;
+	u16 dst_port;
+} *ifaceconfig;
+
+struct ifobject {
+	int ifindex;
+	int ifdict_index;
+	char ifname[MAX_INTERFACE_NAME_CHARS];
+	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
+	struct flow_vector fv;
+	struct xsk_socket_info *xsk;
+	struct xsk_umem_info *umem;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	u32 dst_ip;
+	u32 src_ip;
+	u16 src_port;
+	u16 dst_port;
+};
+
+static struct ifobject *ifdict[MAX_INTERFACES];
+
+/*threads*/
+atomic_int spinning_tx;
+atomic_int spinning_rx;
+pthread_mutex_t sync_mutex;
+pthread_mutex_t sync_mutex_tx;
+pthread_cond_t signal_rx_condition;
+pthread_cond_t signal_tx_condition;
+pthread_t t0, t1, ns_thread;
+pthread_attr_t attr;
+
+struct targs {
+	bool retptr;
+	int idx;
+};
+
+TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
+struct head_s *head_p;
+struct pkt {
+	char *pkt_frame;
+
+	TAILQ_ENTRY(pkt) pkt_nodes;
+} *pkt_node_rx, *pkt_node_rx_q;
+
+struct pkt_frame {
+	char *payload;
+} *pkt_obj;
+
+struct pkt_frame **pkt_buf;
+
+#endif				/* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 29762739c21b..9d54c4645127 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -14,6 +14,8 @@ RED='\033[0;31m'
 NC='\033[0m'
 STACK_LIM=131072
 SPECFILE=veth.spec
+XSKOBJ=xdpxceiver
+NUMPKTS=10000
 
 validate_root_exec()
 {
@@ -117,3 +119,17 @@ vethXDPnative()
 	ip link set dev $1 xdpgeneric off
 	ip netns exec $3 ip link set dev $2 xdpgeneric off
 }
+
+execxdpxceiver()
+{
+	local -a 'paramkeys=("${!'"$1"'[@]}")' copy
+	paramkeysstr=${paramkeys[*]}
+
+	for index in $paramkeysstr;
+		do
+			current=$1"[$index]"
+			copy[$index]=${!current}
+		done
+
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+}
-- 
cgit v1.2.3


From 9103a8594d9324d8e1512442ba580e4e91d42c2d Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:31 +0000
Subject: selftests/bpf: Xsk selftests - DRV POLL, NOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

2. AF_XDP DRV/Native mode
   Works on any netdevice with XDP_REDIRECT support, driver dependent.
   Processes packets before SKB allocation. Provides better performance
   than SKB. Driver hook available just after DMA of buffer descriptor.
   a. nopoll
   b. poll
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-4-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  | 24 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 22 +++++++++++++++++++---
 tools/testing/selftests/bpf/xdpxceiver.h |  3 ++-
 3 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 0b7bafb65f43..aad8065637fd 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -173,6 +173,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 4
+TEST_NAME="DRV NOPOLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 5
+TEST_NAME="DRV POLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 3f2a65b6a9f5..9fcd80a38b07 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -27,7 +27,16 @@
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
  *
- * Total tests: 2
+ * 2. AF_XDP DRV/Native mode
+ *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
+ *    packets before SKB allocation. Provides better performance than SKB. Driver
+ *    hook available just after DMA of buffer descriptor.
+ *    a. nopoll
+ *    b. poll
+ *    - Only copy mode is supported because veth does not currently support
+ *      zero-copy mode
+ *
+ * Total tests: 4
  *
  * Flow:
  * -----
@@ -88,7 +97,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+	(ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
 
 static void pthread_init_mutex(void)
 {
@@ -311,6 +320,7 @@ static struct option long_options[] = {
 	{"queue", optional_argument, 0, 'q'},
 	{"poll", no_argument, 0, 'p'},
 	{"xdp-skb", no_argument, 0, 'S'},
+	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
@@ -326,6 +336,7 @@ static void usage(const char *prog)
 	    "  -q, --queue=n        Use queue n (default 0)\n"
 	    "  -p, --poll           Use poll syscall\n"
 	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
+	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
@@ -417,7 +428,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -448,6 +459,11 @@ static void parse_command_line(int argc, char **argv)
 			opt_xdp_bind_flags |= XDP_COPY;
 			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
 			break;
+		case 'N':
+			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+			opt_xdp_bind_flags |= XDP_COPY;
+			uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
+			break;
 		case 'c':
 			opt_xdp_bind_flags |= XDP_COPY;
 			break;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 5929f2fc1224..12070d66344b 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -38,7 +38,7 @@
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
-#define NEED_WAKEUP 1
+#define NEED_WAKEUP true
 
 typedef __u32 u32;
 typedef __u16 u16;
@@ -46,6 +46,7 @@ typedef __u8 u8;
 
 enum TESTS {
 	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+	ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
 };
 
 u8 uut;
-- 
cgit v1.2.3


From 6674bf66560a6c55aada1e3cd4fca7a3ed204075 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:32 +0000
Subject: selftests/bpf: Xsk selftests - Socket Teardown - SKB, DRV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   c. Socket Teardown
      Create a Tx and a Rx socket, Tx from one socket, Rx on another.
      Destroy both sockets, then repeat multiple times. Only nopoll mode
      is used

2. AF_XDP DRV/Native mode
   c. Socket Teardown
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-5-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  | 24 ++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 35 ++++++++++++++++++++++++++++----
 tools/testing/selftests/bpf/xdpxceiver.h |  2 ++
 3 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index aad8065637fd..9be9dff25560 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -197,6 +197,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 6
+TEST_NAME="SKB SOCKET TEARDOWN"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 7
+TEST_NAME="DRV SOCKET TEARDOWN"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 9fcd80a38b07..e8907109782d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -26,6 +26,9 @@
  *    generic XDP path. XDP hook from netif_receive_skb().
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
+ *    c. Socket Teardown
+ *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ *       both sockets, then repeat multiple times. Only nopoll mode is used
  *
  * 2. AF_XDP DRV/Native mode
  *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
@@ -33,10 +36,11 @@
  *    hook available just after DMA of buffer descriptor.
  *    a. nopoll
  *    b. poll
+ *    c. Socket Teardown
  *    - Only copy mode is supported because veth does not currently support
  *      zero-copy mode
  *
- * Total tests: 4
+ * Total tests: 6
  *
  * Flow:
  * -----
@@ -97,7 +101,8 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+	(ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+			       "NOPOLL", opt_teardown ? "Socket Teardown" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -322,6 +327,7 @@ static struct option long_options[] = {
 	{"xdp-skb", no_argument, 0, 'S'},
 	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
+	{"tear-down", no_argument, 0, 'T'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -338,6 +344,7 @@ static void usage(const char *prog)
 	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
 	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
+	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
@@ -428,7 +435,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -467,6 +474,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'c':
 			opt_xdp_bind_flags |= XDP_COPY;
 			break;
+		case 'T':
+			opt_teardown = 1;
+			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -871,6 +881,9 @@ static void *worker_testapp_validate(void *arg)
 
 		ksft_print_msg("Received %d packets on interface %s\n",
 			       pkt_counter, ((struct ifobject *)arg)->ifname);
+
+		if (opt_teardown)
+			ksft_print_msg("Destroying socket\n");
 	}
 
 	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
@@ -916,6 +929,20 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
+	if (!opt_teardown)
+		print_ksft_result();
+}
+
+static void testapp_sockets(void)
+{
+	for (int i = 0; i < MAX_TEARDOWN_ITER; i++) {
+		pkt_counter = 0;
+		prev_pkt = -1;
+		sigvar = 0;
+		ksft_print_msg("Creating socket\n");
+		testapp_validate();
+	}
+
 	print_ksft_result();
 }
 
@@ -982,7 +1009,7 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(1);
 
-	testapp_validate();
+	opt_teardown ? testapp_sockets() : testapp_validate();
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 12070d66344b..58185b914f99 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -21,6 +21,7 @@
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
+#define MAX_TEARDOWN_ITER 10
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -57,6 +58,7 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
 static int opt_poll;
+static int opt_teardown;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-- 
cgit v1.2.3


From 7d20441eb05ec6d8dc7b16381c53b3c0b3ad6e8a Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:33 +0000
Subject: selftests/bpf: Xsk selftests - Bi-directional Sockets - SKB, DRV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   d. Bi-directional Sockets
      Configure sockets as bi-directional tx/rx sockets, sets up fill
      and completion rings on each socket, tx/rx in both directions.
      Only nopoll mode is used

2. AF_XDP DRV/Native mode
   d. Bi-directional Sockets
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-6-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  |  24 ++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 100 +++++++++++++++++++++++--------
 tools/testing/selftests/bpf/xdpxceiver.h |   4 ++
 3 files changed, 104 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 9be9dff25560..88a7483eaae4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -221,6 +221,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 8
+TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 9
+TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index e8907109782d..014dedaa4dd2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -29,6 +29,10 @@
  *    c. Socket Teardown
  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
  *       both sockets, then repeat multiple times. Only nopoll mode is used
+ *    d. Bi-directional sockets
+ *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ *       completion rings on each socket, tx/rx in both directions. Only nopoll
+ *       mode is used
  *
  * 2. AF_XDP DRV/Native mode
  *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
@@ -37,10 +41,11 @@
  *    a. nopoll
  *    b. poll
  *    c. Socket Teardown
+ *    d. Bi-directional sockets
  *    - Only copy mode is supported because veth does not currently support
  *      zero-copy mode
  *
- * Total tests: 6
+ * Total tests: 8
  *
  * Flow:
  * -----
@@ -101,8 +106,9 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
-			       "NOPOLL", opt_teardown ? "Socket Teardown" : ""))
+	(ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+			       "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
+			       opt_bidi ? "Bi-directional Sockets" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -308,8 +314,13 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 	cfg.xdp_flags = opt_xdp_flags;
 	cfg.bind_flags = opt_xdp_bind_flags;
 
-	rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
-	txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+	if (!opt_bidi) {
+		rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+		txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+	} else {
+		rxr = &ifobject->xsk->rx;
+		txr = &ifobject->xsk->tx;
+	}
 
 	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
 				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
@@ -328,6 +339,7 @@ static struct option long_options[] = {
 	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
 	{"tear-down", no_argument, 0, 'T'},
+	{"bidi", optional_argument, 0, 'B'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -345,6 +357,7 @@ static void usage(const char *prog)
 	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
 	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
+	    "  -B, --bidi           Bi-directional sockets test\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
@@ -435,7 +448,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -477,6 +490,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'T':
 			opt_teardown = 1;
 			break;
+		case 'B':
+			opt_bidi = 1;
+			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -802,22 +818,25 @@ static void *worker_testapp_validate(void *arg)
 	struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
 	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
 	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
-	void *bufs;
+	void *bufs = NULL;
 
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
-		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
+	if (!bidi_pass) {
+		bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+			    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (bufs == MAP_FAILED)
+			exit_with_error(errno);
 
-	if (strcmp(((struct ifobject *)arg)->nsname, ""))
-		switch_namespace(((struct ifobject *)arg)->ifdict_index);
+		if (strcmp(((struct ifobject *)arg)->nsname, ""))
+			switch_namespace(((struct ifobject *)arg)->ifdict_index);
+	}
 
 	if (((struct ifobject *)arg)->fv.vector == tx) {
 		int spinningrxctr = 0;
 
-		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+		if (!bidi_pass)
+			thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
 
 		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
 			spinningrxctr++;
@@ -847,7 +866,8 @@ static void *worker_testapp_validate(void *arg)
 		struct pollfd fds[MAX_SOCKS] = { };
 		int ret;
 
-		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+		if (!bidi_pass)
+			thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
 
 		ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
 		xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
@@ -886,8 +906,10 @@ static void *worker_testapp_validate(void *arg)
 			ksft_print_msg("Destroying socket\n");
 	}
 
-	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
-	(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	if (!opt_bidi || (opt_bidi && bidi_pass)) {
+		xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+		(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	}
 	pthread_exit(NULL);
 }
 
@@ -896,11 +918,26 @@ static void testapp_validate(void)
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
+	if (opt_bidi && bidi_pass) {
+		pthread_init_mutex();
+		if (!switching_notify) {
+			ksft_print_msg("Switching Tx/Rx vectors\n");
+			switching_notify++;
+		}
+	}
+
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
-		exit_with_error(errno);
+	if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+		if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+			exit_with_error(errno);
+	} else if (opt_bidi && bidi_pass) {
+		/*switch Tx/Rx vectors */
+		ifdict[0]->fv.vector = rx;
+		if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0]))
+			exit_with_error(errno);
+	}
 
 	struct timespec max_wait = { 0, 0 };
 
@@ -914,8 +951,15 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
-		exit_with_error(errno);
+	if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+		if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+			exit_with_error(errno);
+	} else if (opt_bidi && bidi_pass) {
+		/*switch Tx/Rx vectors */
+		ifdict[1]->fv.vector = tx;
+		if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1]))
+			exit_with_error(errno);
+	}
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
@@ -929,18 +973,19 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!opt_teardown)
+	if (!opt_teardown && !opt_bidi)
 		print_ksft_result();
 }
 
 static void testapp_sockets(void)
 {
-	for (int i = 0; i < MAX_TEARDOWN_ITER; i++) {
+	for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
 		ksft_print_msg("Creating socket\n");
 		testapp_validate();
+		opt_bidi ? bidi_pass++ : bidi_pass;
 	}
 
 	print_ksft_result();
@@ -1009,7 +1054,14 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(1);
 
-	opt_teardown ? testapp_sockets() : testapp_validate();
+	if (!opt_teardown && !opt_bidi) {
+		testapp_validate();
+	} else if (opt_teardown && opt_bidi) {
+		ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
+		ksft_exit_xfail();
+	} else {
+		testapp_sockets();
+	}
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 58185b914f99..61f595b6f200 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -22,6 +22,7 @@
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
 #define MAX_TEARDOWN_ITER 10
+#define MAX_BIDI_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -53,12 +54,15 @@ enum TESTS {
 u8 uut;
 u8 debug_pkt_dump;
 u32 num_frames;
+u8 switching_notify;
+u8 bidi_pass;
 
 static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
 static int opt_poll;
 static int opt_teardown;
+static int opt_bidi;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-- 
cgit v1.2.3


From a5b7b1194a57bc59f289f3e4433a1be81cc3e19d Mon Sep 17 00:00:00 2001
From: Veronika Kabatova <vkabatov@redhat.com>
Date: Thu, 10 Dec 2020 13:01:34 +0100
Subject: selftests/bpf: Drop tcp-{client,server}.py from Makefile

The files don't exist anymore so this breaks generic kselftest builds
when using "make install" or "make gen_tar".

Fixes: 247f0ec361b7 ("selftests/bpf: Drop python client/server in favor of threads")
Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210120134.2148482-1-vkabatov@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 944ae17a39ed..50b3495d7ddf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -75,8 +75,6 @@ TEST_PROGS := test_kmod.sh \
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
-	tcp_client.py \
-	tcp_server.py \
 	test_xdp_vlan.sh
 
 # Compile but not part of 'make run_tests'
-- 
cgit v1.2.3


From 7535a3526dfe78db02a08ca2fa6bf69f393105dd Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Thu, 10 Dec 2020 11:54:35 +0000
Subject: selftests/bpf: Xsk selftests - adding xdpxceiver to .gitignore

This patch adds *xdpxceiver* to selftests/bpf/.gitignore

Reported-by: Yonghong Song <yhs@fb.com>
Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210115435.3995-1-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 752d8edddc66..f5b7ef93618c 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -36,3 +36,4 @@ test_cpp
 /runqslower
 /bench
 *.ko
+xdpxceiver
-- 
cgit v1.2.3


From 41003dd0241c2ceb2461a88a18ff461795f2af57 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 9 Dec 2020 15:29:12 +0100
Subject: selftests/bpf: Make selftest compilation work on clang 11

We can't compile test_core_reloc_module.c selftest with clang 11, compile
fails with:

  CLNG-LLC [test_maps] test_core_reloc_module.o
  progs/test_core_reloc_module.c:57:21: error: use of unknown builtin \
  '__builtin_preserve_type_info' [-Wimplicit-function-declaration]
   out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);

Skipping these tests if __builtin_preserve_type_info() is not supported
by compiler.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201209142912.99145-1-jolsa@kernel.org
---
 tools/testing/selftests/bpf/progs/test_core_reloc_module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index 56363959f7b0..f59f175c7baf 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -40,6 +40,7 @@ int BPF_PROG(test_core_module_probed,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
+#if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
 	__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -61,6 +62,9 @@ int BPF_PROG(test_core_module_probed,
 	out->len_exists = bpf_core_field_exists(read_ctx->len);
 
 	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+	data.skip = true;
+#endif
 
 	return 0;
 }
@@ -70,6 +74,7 @@ int BPF_PROG(test_core_module_direct,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
+#if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
 	__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -91,6 +96,9 @@ int BPF_PROG(test_core_module_direct,
 	out->len_exists = bpf_core_field_exists(read_ctx->len);
 
 	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+	data.skip = true;
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 511a76bcb0ce242a19153658b25437906cc6070e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 8 Dec 2020 19:01:52 +0100
Subject: selftests/bpf: Add test for signed 32-bit bound check bug

After a 32-bit load followed by a branch, the verifier would reduce the
maximum bound of the register to 0x7fffffff, allowing a user to bypass
bound checks. Ensure such a program is rejected.

In the second test, the 64-bit compare should not sufficient to
determine whether the signed 32-bit lower bound is 0, so the verifier
should reject the second branch.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/bounds.c | 41 +++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index dac40de3f868..57ed67b86074 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -703,3 +703,44 @@
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
+{
+	"bounds checks after 32-bit truncation. test 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+	/* This used to reduce the max bound to 0x7fffffff */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.errstr_unpriv = "R0 leaks addr",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+},
+{
+	"bounds checks after 32-bit truncation. test 2",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1),
+	BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.errstr_unpriv = "R0 leaks addr",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3


From 77ce220c0549dcc3db8226c61c60e83fc59dfafc Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 8 Dec 2020 19:01:53 +0100
Subject: selftests/bpf: Fix array access with signed variable test

The test fails because of a recent fix to the verifier, even though this
program is valid. In details what happens is:

    7: (61) r1 = *(u32 *)(r0 +0)

Load a 32-bit value, with signed bounds [S32_MIN, S32_MAX]. The bounds
of the 64-bit value are [0, U32_MAX]...

    8: (65) if r1 s> 0xffffffff goto pc+1

... therefore this is always true (the operand is sign-extended).

    10: (b4) w2 = 11
    11: (6d) if r2 s> r1 goto pc+1

When true, the 64-bit bounds become [0, 10]. The 32-bit bounds are still
[S32_MIN, 10].

    13: (64) w1 <<= 2

Because this is a 32-bit operation, the verifier propagates the new
32-bit bounds to the 64-bit ones, and the knowledge gained from insn 11
is lost.

    14: (0f) r0 += r1
    15: (7a) *(u64 *)(r0 +0) = 4

Then the verifier considers r0 unbounded here, rejecting the test. To
make the test work, change insn 8 to check the sign of the 32-bit value.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/array_access.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 1c4b1939f5a8..bed53b561e04 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -68,7 +68,7 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
-	BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+	BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
 	BPF_MOV32_IMM(BPF_REG_1, 0),
 	BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
 	BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
-- 
cgit v1.2.3


From 3615bdf6d9b19db12b1589861609b4f1c6a8d303 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 8 Dec 2020 19:01:54 +0100
Subject: selftests/bpf: Fix "dubious pointer arithmetic" test

The verifier trace changed following a bugfix. After checking the 64-bit
sign, only the upper bit mask is known, not bit 31. Update the test
accordingly.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/align.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 52414058a627..5861446d0777 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -456,10 +456,10 @@ static struct bpf_align_test tests[] = {
 			 */
 			{7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
-			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 		}
 	},
 	{
-- 
cgit v1.2.3


From 38bf8cd821be292e7d8e6f6283d67c5d9708f887 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Dec 2020 12:21:13 +0100
Subject: selftests: fix poll error in udpgro.sh

The test program udpgso_bench_rx always invokes the poll()
syscall with a timeout of 10ms. If a larger timeout is specified
via the command line, udpgso_bench_rx is supposed to do multiple
poll() calls till the timeout is expired or an event is received.

Currently the poll() loop errors out after the first invocation with
no events, and may causes self-tests failure alike:

failed
 GRO with custom segment size            ./udpgso_bench_rx: poll: 0x0 expected 0x1

This change addresses the issue allowing the poll() loop to consume
all the configured timeout.

Fixes: ada641ff6ed3 ("selftests: fixes for UDP GRO")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench_rx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index db3d4a8b5a4c..76a24052f4b4 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms)
 				interrupted = true;
 				break;
 			}
+
+			/* no events and more time to wait, do poll again */
+			continue;
 		}
 		if (pfd.revents != POLLIN)
 			error(1, errno, "poll: 0x%x expected 0x%x\n",
-- 
cgit v1.2.3


From a67079b03165a17f9aceab3dd26b1638af68e0fc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 10 Dec 2020 17:59:46 -0800
Subject: selftests/bpf: fix bpf_testmod.ko recompilation logic

bpf_testmod.ko build rule declared dependency on VMLINUX_BTF, but the variable
itself was initialized after the rule was declared, which often caused
bpf_testmod.ko to not be re-compiled. Fix by moving VMLINUX_BTF determination
sooner.

Also enforce bpf_testmod.ko recompilation when we detect that vmlinux image
changed by removing bpf_testmod/bpf_testmod.ko. This is necessary to generate
correct module's split BTF. Without it, Kbuild's module build logic might
determine that nothing changed on the kernel side and thus bpf_testmod.ko
shouldn't be rebuilt, so won't re-generate module BTF, which often leads to
module's BTF with wrong string offsets against vmlinux BTF. Removing .ko file
forces Kbuild to re-build the module.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 9f7fa225894c ("selftests/bpf: Add bpf_testmod kernel module for testing")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20201211015946.4062098-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 50b3495d7ddf..8b515a17f44b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -116,6 +116,13 @@ INCLUDE_DIR := $(SCRATCH_DIR)/include
 BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
 RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
 
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
+		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
+		     ../../../../vmlinux				\
+		     /sys/kernel/btf/vmlinux				\
+		     /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
 # to build individual tests.
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
@@ -140,6 +147,7 @@ $(OUTPUT)/urandom_read: urandom_read.c
 
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
+	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
 	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
 	$(Q)cp bpf_testmod/bpf_testmod.ko $@
 
@@ -147,13 +155,6 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
 
-VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
-		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
-		     ../../../../vmlinux				\
-		     /sys/kernel/btf/vmlinux				\
-		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-
 DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
 
 $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
-- 
cgit v1.2.3


From 89ad7420b25c2b40a4d916f4fd43b9ccacd50500 Mon Sep 17 00:00:00 2001
From: Andrew Delgadillo <adelg@google.com>
Date: Fri, 11 Dec 2020 00:43:44 +0000
Subject: selftests/bpf: Drop the need for LLVM's llc

LLC is meant for compiler development and debugging. Consequently, it
exposes many low level options about its backend. To avoid future bugs
introduced by using the raw LLC tool, use clang directly so that all
appropriate options are passed to the back end.

Additionally, simplify the Makefile by removing the
CLANG_NATIVE_BPF_BUILD_RULE as it is not being use, stop passing
dwarfris attr since elfutils/libdw now supports the bpf backend (which
should work with any recent pahole), and stop passing alu32 since
-mcpu=v3 implies alu32.

Signed-off-by: Andrew Delgadillo <adelg@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201211004344.3355074-1-adelg@google.com
---
 tools/testing/selftests/bpf/Makefile | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8b515a17f44b..8c33e999319a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,6 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CLANG		?= clang
-LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
@@ -253,31 +252,19 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
 # $1 - input .c file
 # $2 - output .o file
 # $3 - CFLAGS
-# $4 - LDFLAGS
 define CLANG_BPF_BUILD_RULE
-	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+	$(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
 define CLANG_NOALU32_BPF_BUILD_RULE
-	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
-endef
-# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
-define CLANG_NATIVE_BPF_BUILD_RULE
 	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -emit-llvm					\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+	$(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2
 endef
 # Build BPF object using GCC
 define GCC_BPF_BUILD_RULE
 	$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
-	$(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+	$(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
 endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -332,8 +319,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
 		     $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
-					  $(TRUNNER_BPF_CFLAGS),	\
-					  $(TRUNNER_BPF_LDFLAGS))
+					  $(TRUNNER_BPF_CFLAGS))
 
 $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:			\
 		      $(TRUNNER_OUTPUT)/%.o				\
@@ -401,19 +387,16 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
 # Define test_progs BPF-GCC-flavored test runner.
 ifneq ($(BPF_GCC),)
 TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
 endif
 
@@ -424,7 +407,6 @@ TRUNNER_EXTRA_SOURCES := test_maps.c
 TRUNNER_EXTRA_FILES :=
 TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
 TRUNNER_BPF_CFLAGS :=
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_maps))
 
 # Define test_verifier test runner.
-- 
cgit v1.2.3


From b4fe9fec51ef48011f11c2da4099f0b530449c92 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 11 Dec 2020 01:07:11 +0000
Subject: selftests/bpf: Silence ima_setup.sh when not running in verbose mode.

Currently, ima_setup.sh spews outputs from commands like mkfs and dd
on the terminal without taking into account the verbosity level of
the test framework. Update test_progs to set the environment variable
SELFTESTS_VERBOSE=1 when a verbose output is requested. This
environment variable is then used by ima_setup.sh (and can be used by
other similar scripts) to obey the verbosity level of the test harness
without needing to re-implement command line options for verbosity.

In "silent" mode, the script saves the output to a temporary file, the
contents of which are echoed back to stderr when the script encounters
an error.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201211010711.3716917-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 24 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c | 10 ++++++++++
 2 files changed, 34 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 2bfc646bc230..8e62581113a3 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -7,6 +7,8 @@ set -o pipefail
 
 IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
 TEST_BINARY="/bin/true"
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
 
 usage()
 {
@@ -75,6 +77,19 @@ run()
 	exec "${copied_bin_path}"
 }
 
+catch()
+{
+	local exit_code="$1"
+	local log_file="$2"
+
+	if [[ "${exit_code}" -ne 0 ]]; then
+		cat "${log_file}" >&3
+	fi
+
+	rm -f "${log_file}"
+	exit ${exit_code}
+}
+
 main()
 {
 	[[ $# -ne 2 ]] && usage
@@ -96,4 +111,13 @@ main()
 	fi
 }
 
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+	# Save the stderr to 3 so that we can output back to
+	# it incase of an error.
+	exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
 main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5ef081bdae4e..7d077d48cadd 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -587,6 +587,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 				return -EINVAL;
 			}
 		}
+
+		if (env->verbosity > VERBOSE_NONE) {
+			if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
+				fprintf(stderr,
+					"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
+					errno);
+				return -1;
+			}
+		}
+
 		break;
 	case ARG_GET_TEST_CNT:
 		env->get_test_cnt = true;
-- 
cgit v1.2.3


From b7906b70a2337e445b8dca3ce7ba8976b6ebd07d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 22:36:25 +0100
Subject: bpf: Fix enum names for bpf_this_cpu_ptr() and bpf_per_cpu_ptr()
 helpers

Remove bpf_ prefix, which causes these helpers to be reported in verifier
dump as bpf_bpf_this_cpu_ptr() and bpf_bpf_per_cpu_ptr(), respectively. Lets
fix it as long as it is still possible before UAPI freezes on these helpers.

Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/include/uapi/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..556216dc9703 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3897,8 +3897,8 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
-	FN(bpf_per_cpu_ptr),            \
-	FN(bpf_this_cpu_ptr),		\
+	FN(per_cpu_ptr),		\
+	FN(this_cpu_ptr),		\
 	FN(redirect_peer),		\
 	/* */
 
-- 
cgit v1.2.3


From 111d0bda8eeb4b54e0c63897b071effbf9fd9251 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@de.ibm.com>
Date: Tue, 8 Dec 2020 22:08:29 +0100
Subject: tools/kvm_stat: Exempt time-based counters

The new counters halt_poll_success_ns and halt_poll_fail_ns do not count
events. Instead they provide a time, and mess up our statistics. Therefore,
we should exclude them.
Removal is currently implemented with an exempt list. If more counters like
these appear, we can think about a more general rule like excluding all
fields name "*_ns", in case that's a standing convention.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Tested-and-reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20201208210829.101324-1-raspl@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index d199a3694be8..b0bf56c5f120 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -742,7 +742,11 @@ class DebugfsProvider(Provider):
         The fields are all available KVM debugfs files
 
         """
-        return self.walkdir(PATH_DEBUGFS_KVM)[2]
+        exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns']
+        fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
+                  if field not in exempt_list]
+
+        return fields
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-- 
cgit v1.2.3


From fe62de310e2b563c0d303a09d06b020077fe86b4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 13:58:24 -0800
Subject: libbpf: Support modules in bpf_program__set_attach_target() API

Support finding kernel targets in kernel modules when using
bpf_program__set_attach_target() API. This brings it up to par with what
libbpf supports when doing declarative SEC()-based target determination.

Some minor internal refactoring was needed to make sure vmlinux BTF can be
loaded before bpf_object's load phase.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201211215825.3646154-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 64 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9be88a90a4aa..6ae748f6ea11 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2518,7 +2518,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
 	return 0;
 }
 
-static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 {
 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 	    prog->type == BPF_PROG_TYPE_LSM)
@@ -2533,37 +2533,43 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
 	return false;
 }
 
-static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 {
-	bool need_vmlinux_btf = false;
 	struct bpf_program *prog;
-	int i, err;
+	int i;
 
 	/* CO-RE relocations need kernel BTF */
 	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
-		need_vmlinux_btf = true;
+		return true;
 
 	/* Support for typed ksyms needs kernel BTF */
 	for (i = 0; i < obj->nr_extern; i++) {
 		const struct extern_desc *ext;
 
 		ext = &obj->externs[i];
-		if (ext->type == EXT_KSYM && ext->ksym.type_id) {
-			need_vmlinux_btf = true;
-			break;
-		}
+		if (ext->type == EXT_KSYM && ext->ksym.type_id)
+			return true;
 	}
 
 	bpf_object__for_each_program(prog, obj) {
 		if (!prog->load)
 			continue;
-		if (libbpf_prog_needs_vmlinux_btf(prog)) {
-			need_vmlinux_btf = true;
-			break;
-		}
+		if (prog_needs_vmlinux_btf(prog))
+			return true;
 	}
 
-	if (!need_vmlinux_btf)
+	return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
+{
+	int err;
+
+	/* btf_vmlinux could be loaded earlier */
+	if (obj->btf_vmlinux)
+		return 0;
+
+	if (!force && !obj_needs_vmlinux_btf(obj))
 		return 0;
 
 	obj->btf_vmlinux = libbpf_find_kernel_btf();
@@ -7475,7 +7481,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	}
 
 	err = bpf_object__probe_loading(obj);
-	err = err ? : bpf_object__load_vmlinux_btf(obj);
+	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
@@ -10870,23 +10876,33 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 				   int attach_prog_fd,
 				   const char *attach_func_name)
 {
-	int btf_id;
+	int btf_obj_fd = 0, btf_id = 0, err;
 
 	if (!prog || attach_prog_fd < 0 || !attach_func_name)
 		return -EINVAL;
 
-	if (attach_prog_fd)
+	if (prog->obj->loaded)
+		return -EINVAL;
+
+	if (attach_prog_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name,
 						 attach_prog_fd);
-	else
-		btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
-						    prog->expected_attach_type);
-
-	if (btf_id < 0)
-		return btf_id;
+		if (btf_id < 0)
+			return btf_id;
+	} else {
+		/* load btf_vmlinux, if not yet */
+		err = bpf_object__load_vmlinux_btf(prog->obj, true);
+		if (err)
+			return err;
+		err = find_kernel_btf_id(prog->obj, attach_func_name,
+					 prog->expected_attach_type,
+					 &btf_obj_fd, &btf_id);
+		if (err)
+			return err;
+	}
 
 	prog->attach_btf_id = btf_id;
-	prog->attach_btf_obj_fd = 0;
+	prog->attach_btf_obj_fd = btf_obj_fd;
 	prog->attach_prog_fd = attach_prog_fd;
 	return 0;
 }
-- 
cgit v1.2.3


From 2e33f831fccd2df83836a8e255755f85d364aaeb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 13:58:25 -0800
Subject: selftests/bpf: Add set_attach_target() API selftest for module target

Add test for bpf_program__set_attach_target() API, validating it can find
kernel module fentry target.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201211215825.3646154-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/module_attach.c | 11 ++++++++++-
 tools/testing/selftests/bpf/progs/test_module_attach.c | 11 +++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 4b65e9918764..50796b651f72 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -28,10 +28,18 @@ void test_module_attach(void)
 	struct test_module_attach__bss *bss;
 	int err;
 
-	skel = test_module_attach__open_and_load();
+	skel = test_module_attach__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
 		return;
 
+	err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual,
+					     0, "bpf_testmod_test_read");
+	ASSERT_OK(err, "set_attach_target");
+
+	err = test_module_attach__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton\n"))
+		return;
+
 	bss = skel->bss;
 
 	err = test_module_attach__attach(skel);
@@ -44,6 +52,7 @@ void test_module_attach(void)
 	ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
 	ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
 	ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+	ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
 	ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
 	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
 	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index b563563df172..efd1e287ac17 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -38,6 +38,17 @@ int BPF_PROG(handle_fentry,
 	return 0;
 }
 
+__u32 fentry_manual_read_sz = 0;
+
+SEC("fentry/placeholder")
+int BPF_PROG(handle_fentry_manual,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fentry_manual_read_sz = len;
+	return 0;
+}
+
 __u32 fexit_read_sz = 0;
 int fexit_ret = 0;
 
-- 
cgit v1.2.3


From a4d2a7ad86834092b327082004ead755d2603376 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Mon, 14 Dec 2020 11:38:12 +0000
Subject: libbpf: Expose libbpf ring_buffer epoll_fd

This provides a convenient perf ringbuf -> libbpf ringbuf migration
path for users of external polling systems. It is analogous to
perf_buffer__epoll_fd.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201214113812.305274-1-jackmanb@google.com
---
 tools/lib/bpf/libbpf.h   | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 tools/lib/bpf/ringbuf.c  | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6909ee81113a..3c35eb401931 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -536,6 +536,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 				ring_buffer_sample_fn sample_cb, void *ctx);
 LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
 
 /* Perf buffer APIs */
 struct perf_buffer;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7c4126542e2b..1c0fd2dd233a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -346,6 +346,7 @@ LIBBPF_0.3.0 {
 		btf__parse_split;
 		btf__new_empty_split;
 		btf__new_split;
+		ring_buffer__epoll_fd;
 		xsk_setup_xdp_prog;
 		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..a8f997d47adf 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -282,3 +282,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 	}
 	return cnt < 0 ? -errno : res;
 }
+
+/* Get an fd that can be used to sleep until data is available in the ring(s) */
+int ring_buffer__epoll_fd(const struct ring_buffer *rb)
+{
+	return rb->epoll_fd;
+}
-- 
cgit v1.2.3


From b4b638c36b7e7acd847b9c4b9c80f268e45ea30c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 9 Dec 2020 17:33:50 -0800
Subject: selftests/bpf: Add a test for ptr_to_map_value on stack for helper
 access

Change bpf_iter_task.c such that pointer to map_value may appear
on the stack for bpf_seq_printf() to access. Without previous
verifier patch, the bpf_iter test will fail.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201210013350.943985-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_task.c | 3 ++-
 tools/testing/selftests/bpf/verifier/unpriv.c     | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index 4983087852a0..b7f32c160f4e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -11,9 +11,10 @@ int dump_task(struct bpf_iter__task *ctx)
 {
 	struct seq_file *seq = ctx->meta->seq;
 	struct task_struct *task = ctx->task;
+	static char info[] = "    === END ===";
 
 	if (task == (void *)0) {
-		BPF_SEQ_PRINTF(seq, "    === END ===\n");
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
 		return 0;
 	}
 
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 91bb77c24a2e..a3fe0fbaed41 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -108,8 +108,9 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "invalid indirect read from stack off -8+0 size 8",
-	.result = REJECT,
+	.errstr_unpriv = "invalid indirect read from stack off -8+0 size 8",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
 },
 {
 	"unpriv: mangle pointer on stack 1",
-- 
cgit v1.2.3


From 6fe4ccdc3dabe3de573e27fb2684d925bd611458 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 10 Dec 2020 14:25:00 -0800
Subject: selftests: mptcp: add the flush addrs testcase

This patch added the flush addrs testcase. In do_transfer, if the number
of removing addresses is less than 8, use the del addr command to remove
the addresses one by one. If the number is more than 8, use the flush addrs
command to remove the addresses.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 50 ++++++++++++++++++-------
 1 file changed, 36 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 0eae628d1ffd..9aa9624cff97 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -264,27 +264,37 @@ do_transfer()
 	cpid=$!
 
 	if [ $rm_nr_ns1 -gt 0 ]; then
-		counter=1
-		sleep 1
+		if [ $rm_nr_ns1 -lt 8 ]; then
+			counter=1
+			sleep 1
 
-		while [ $counter -le $rm_nr_ns1 ]
-		do
-			ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+			while [ $counter -le $rm_nr_ns1 ]
+			do
+				ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+				sleep 1
+				let counter+=1
+			done
+		else
 			sleep 1
-			let counter+=1
-		done
+			ip netns exec ${listener_ns} ./pm_nl_ctl flush
+		fi
 	fi
 
 	if [ $rm_nr_ns2 -gt 0 ]; then
-		counter=1
-		sleep 1
+		if [ $rm_nr_ns2 -lt 8 ]; then
+			counter=1
+			sleep 1
 
-		while [ $counter -le $rm_nr_ns2 ]
-		do
-			ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+			while [ $counter -le $rm_nr_ns2 ]
+			do
+				ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+				sleep 1
+				let counter+=1
+			done
+		else
 			sleep 1
-			let counter+=1
-		done
+			ip netns exec ${connector_ns} ./pm_nl_ctl flush
+		fi
 	fi
 
 	wait $cpid
@@ -663,6 +673,18 @@ chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
 
+# subflows and signal, flush
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1 0 8 8 slow
+chk_join_nr "flush subflows and signal" 3 3 3
+chk_add_nr 1 1
+chk_rm_nr 2 2
+
 # subflow IPv6
 reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-- 
cgit v1.2.3


From 0e12c0271887f1b00b79b7612c1d4f0d3d34e8a8 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Fri, 11 Dec 2020 12:24:20 +0800
Subject: selftests: test_vxlan_under_vrf: mute unnecessary error message

The cleanup function in this script that tries to delete hv-1 / hv-2
vm-1 / vm-2 netns will generate some uncessary error messages:

Cannot remove namespace file "/run/netns/hv-2": No such file or directory
Cannot remove namespace file "/run/netns/vm-1": No such file or directory
Cannot remove namespace file "/run/netns/vm-2": No such file or directory

Redirect it to /dev/null like other commands in the cleanup function
to reduce confusion.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Link: https://lore.kernel.org/r/20201211042420.16411-1-po-hsu.lin@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/test_vxlan_under_vrf.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 09f9ed92cbe4..534c8b7699ab 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -50,7 +50,7 @@ cleanup() {
     ip link del veth-tap 2>/dev/null || true
 
     for ns in hv-1 hv-2 vm-1 vm-2; do
-        ip netns del $ns || true
+        ip netns del $ns 2>/dev/null || true
     done
 }
 
-- 
cgit v1.2.3


From 9c84f229268fa229e250b7225611d0eb7094fea0 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:05 -0800
Subject: mm/gup_benchmark: rename to mm/gup_test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "selftests/vm: gup_test, hmm-tests, assorted improvements", v3.

Summary: This series provides two main things, and a number of smaller
supporting goodies.  The two main points are:

1) Add a new sub-test to gup_test, which in turn is a renamed version
   of gup_benchmark.  This sub-test allows nicer testing of dump_pages(),
   at least on user-space pages.

   For quite a while, I was doing a quick hack to gup_test.c whenever I
   wanted to try out changes to dump_page().  Then Matthew Wilcox asked me
   what I meant when I said "I used my dump_page() unit test", and I
   realized that it might be nice to check in a polished up version of
   that.

   Details about how it works and how to use it are in the commit
   description for patch #6 ("selftests/vm: gup_test: introduce the
   dump_pages() sub-test").

2) Fixes a limitation of hmm-tests: these tests are incredibly useful,
   but only if people actually build and run them.  And it turns out that
   libhugetlbfs is a little too effective at throwing a wrench in the
   works, there.  So I've added a little configuration check that removes
   just two of the 21 hmm-tests, if libhugetlbfs is not available.

   Further details in the commit description of patch #8
   ("selftests/vm: hmm-tests: remove the libhugetlbfs dependency").

Other smaller things that this series does:

a) Remove code duplication by creating gup_test.h.

b) Clear up the sub-test organization, and their invocation within
   run_vmtests.sh.

c) Other minor assorted improvements.

[1] v2 is here:
https://lore.kernel.org/linux-doc/20200929212747.251804-1-jhubbard@nvidia.com/

[2] https://lore.kernel.org/r/CAHk-=wgh-TMPHLY3jueHX7Y2fWh3D+nMBqVS__AZm6-oorquWA@mail.gmail.com

This patch (of 9):

Rename nearly every "gup_benchmark" reference and file name to "gup_test".
The one exception is for the actual gup benchmark test itself.

The current code already does a *little* bit more than benchmarking, and
definitely covers more than get_user_pages_fast().  More importantly,
however, subsequent patches are about to add some functionality that is
non-benchmark related.

Closely related changes:

* Kconfig: in addition to renaming the options from GUP_BENCHMARK to
  GUP_TEST, update the help text to reflect that it's no longer a
  benchmark-only test.

Link: https://lkml.kernel.org/r/20201026064021.3545418-1-jhubbard@nvidia.com
Link: https://lkml.kernel.org/r/20201026064021.3545418-2-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore      |   2 +-
 tools/testing/selftests/vm/Makefile        |   2 +-
 tools/testing/selftests/vm/config          |   2 +-
 tools/testing/selftests/vm/gup_benchmark.c | 143 -----------------------------
 tools/testing/selftests/vm/gup_test.c      | 143 +++++++++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests     |   8 +-
 6 files changed, 150 insertions(+), 150 deletions(-)
 delete mode 100644 tools/testing/selftests/vm/gup_benchmark.c
 create mode 100644 tools/testing/selftests/vm/gup_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 849e8226395a..2c8ddcf41c0e 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -15,7 +15,7 @@ userfaultfd
 mlock-intersect-test
 mlock-random-test
 virtual_address_range
-gup_benchmark
+gup_test
 va_128TBswitch
 map_fixed_noreplace
 write_to_hugetlbfs
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 691893afc15d..43723df2c6c4 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -23,7 +23,7 @@ MAKEFLAGS += --no-builtin-rules
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
-TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 69dd0d1aa30b..60e82da0de85 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -3,4 +3,4 @@ CONFIG_USERFAULTFD=y
 CONFIG_TEST_VMALLOC=m
 CONFIG_DEVICE_PRIVATE=y
 CONFIG_TEST_HMM=m
-CONFIG_GUP_BENCHMARK=y
+CONFIG_GUP_TEST=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
deleted file mode 100644
index 1d4359341e44..000000000000
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <linux/types.h>
-
-#define MB (1UL << 20)
-#define PAGE_SIZE sysconf(_SC_PAGESIZE)
-
-#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_benchmark)
-
-/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_benchmark)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_benchmark)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_benchmark)
-
-/* Just the flags we need, copied from mm.h: */
-#define FOLL_WRITE	0x01	/* check pte is writable */
-
-struct gup_benchmark {
-	__u64 get_delta_usec;
-	__u64 put_delta_usec;
-	__u64 addr;
-	__u64 size;
-	__u32 nr_pages_per_call;
-	__u32 flags;
-	__u64 expansion[10];	/* For future use */
-};
-
-int main(int argc, char **argv)
-{
-	struct gup_benchmark gup;
-	unsigned long size = 128 * MB;
-	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
-	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
-	char *file = "/dev/zero";
-	char *p;
-
-	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
-		switch (opt) {
-		case 'a':
-			cmd = PIN_FAST_BENCHMARK;
-			break;
-		case 'b':
-			cmd = PIN_BENCHMARK;
-			break;
-		case 'L':
-			cmd = PIN_LONGTERM_BENCHMARK;
-			break;
-		case 'm':
-			size = atoi(optarg) * MB;
-			break;
-		case 'r':
-			repeats = atoi(optarg);
-			break;
-		case 'n':
-			nr_pages = atoi(optarg);
-			break;
-		case 't':
-			thp = 1;
-			break;
-		case 'T':
-			thp = 0;
-			break;
-		case 'U':
-			cmd = GUP_BENCHMARK;
-			break;
-		case 'u':
-			cmd = GUP_FAST_BENCHMARK;
-			break;
-		case 'w':
-			write = 1;
-			break;
-		case 'f':
-			file = optarg;
-			break;
-		case 'S':
-			flags &= ~MAP_PRIVATE;
-			flags |= MAP_SHARED;
-			break;
-		case 'H':
-			flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
-			break;
-		default:
-			return -1;
-		}
-	}
-
-	filed = open(file, O_RDWR|O_CREAT);
-	if (filed < 0) {
-		perror("open");
-		exit(filed);
-	}
-
-	gup.nr_pages_per_call = nr_pages;
-	if (write)
-		gup.flags |= FOLL_WRITE;
-
-	fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
-	if (fd == -1) {
-		perror("open");
-		exit(1);
-	}
-
-	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
-	if (p == MAP_FAILED) {
-		perror("mmap");
-		exit(1);
-	}
-	gup.addr = (unsigned long)p;
-
-	if (thp == 1)
-		madvise(p, size, MADV_HUGEPAGE);
-	else if (thp == 0)
-		madvise(p, size, MADV_NOHUGEPAGE);
-
-	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
-		p[0] = 0;
-
-	for (i = 0; i < repeats; i++) {
-		gup.size = size;
-		if (ioctl(fd, cmd, &gup)) {
-			perror("ioctl");
-			exit(1);
-		}
-
-		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
-			gup.put_delta_usec);
-		if (gup.size != size)
-			printf(", truncated (size: %lld)", gup.size);
-		printf("\n");
-	}
-
-	return 0;
-}
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
new file mode 100644
index 000000000000..00b4731f535e
--- /dev/null
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -0,0 +1,143 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+#define MB (1UL << 20)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
+#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
+
+/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
+#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
+#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
+
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE	0x01	/* check pte is writable */
+
+struct gup_test {
+	__u64 get_delta_usec;
+	__u64 put_delta_usec;
+	__u64 addr;
+	__u64 size;
+	__u32 nr_pages_per_call;
+	__u32 flags;
+	__u64 expansion[10];	/* For future use */
+};
+
+int main(int argc, char **argv)
+{
+	struct gup_test gup;
+	unsigned long size = 128 * MB;
+	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+	char *file = "/dev/zero";
+	char *p;
+
+	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
+		switch (opt) {
+		case 'a':
+			cmd = PIN_FAST_BENCHMARK;
+			break;
+		case 'b':
+			cmd = PIN_BENCHMARK;
+			break;
+		case 'L':
+			cmd = PIN_LONGTERM_BENCHMARK;
+			break;
+		case 'm':
+			size = atoi(optarg) * MB;
+			break;
+		case 'r':
+			repeats = atoi(optarg);
+			break;
+		case 'n':
+			nr_pages = atoi(optarg);
+			break;
+		case 't':
+			thp = 1;
+			break;
+		case 'T':
+			thp = 0;
+			break;
+		case 'U':
+			cmd = GUP_BENCHMARK;
+			break;
+		case 'u':
+			cmd = GUP_FAST_BENCHMARK;
+			break;
+		case 'w':
+			write = 1;
+			break;
+		case 'f':
+			file = optarg;
+			break;
+		case 'S':
+			flags &= ~MAP_PRIVATE;
+			flags |= MAP_SHARED;
+			break;
+		case 'H':
+			flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
+			break;
+		default:
+			return -1;
+		}
+	}
+
+	filed = open(file, O_RDWR|O_CREAT);
+	if (filed < 0) {
+		perror("open");
+		exit(filed);
+	}
+
+	gup.nr_pages_per_call = nr_pages;
+	if (write)
+		gup.flags |= FOLL_WRITE;
+
+	fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
+
+	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+	gup.addr = (unsigned long)p;
+
+	if (thp == 1)
+		madvise(p, size, MADV_HUGEPAGE);
+	else if (thp == 0)
+		madvise(p, size, MADV_NOHUGEPAGE);
+
+	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
+		p[0] = 0;
+
+	for (i = 0; i < repeats; i++) {
+		gup.size = size;
+		if (ioctl(fd, cmd, &gup)) {
+			perror("ioctl");
+			exit(1);
+		}
+
+		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
+			gup.put_delta_usec);
+		if (gup.size != size)
+			printf(", truncated (size: %lld)", gup.size);
+		printf("\n");
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index a3f4f30f0a2e..d1843d5f3c30 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -124,9 +124,9 @@ else
 fi
 
 echo "--------------------------------------------"
-echo "running 'gup_benchmark -U' (normal/slow gup)"
+echo "running 'gup_test -U' (normal/slow gup)"
 echo "--------------------------------------------"
-./gup_benchmark -U
+./gup_test -U
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
@@ -135,9 +135,9 @@ else
 fi
 
 echo "------------------------------------------"
-echo "running gup_benchmark -b (pin_user_pages)"
+echo "running gup_test -b (pin_user_pages)"
 echo "------------------------------------------"
-./gup_benchmark -b
+./gup_test -b
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3


From b9dcfdff8b4b223280015281b5050976c484c80a Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:08 -0800
Subject: selftests/vm: use a common gup_test.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid the need to copy-paste the gup_test ioctl commands and the struct
gup_test definition, between the kernel and the user space application, by
providing a new header file for these.  This allows easier and safer
adding of new ioctl calls, as well as reducing the overall line count.

Details: The header file has to be able to compile independently, because
of the arguably unfortunate way that the Makefile is written: the Makefile
tries to build all of its prerequisites, when really it should be only
building the .c files, and leaving the other prerequisites (LOCAL_HDRS) as
pure dependencies.

That Makefile limitation is probably not worth fixing, but it explains why
one of the includes had to be moved into the new header file.

Also: simplify the ioctl struct (struct gup_test), by deleting the unused
__expansion[10] field.  This sort of thing is what you might see in a
stable ABI, but this low-level, kernel-developer-oriented selftests/vm
system is very much not subject to ABI stability.  So "expansion" and
"reserved" fields are unnecessary here.

Link: https://lkml.kernel.org/r/20201026064021.3545418-3-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile   |  2 ++
 tools/testing/selftests/vm/gup_test.c | 22 +---------------------
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 43723df2c6c4..101c0315bc92 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -134,3 +134,5 @@ endif
 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread
 
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
+
+$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 00b4731f535e..03f7c4f1beaf 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -2,39 +2,19 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-
-#include <linux/types.h>
+#include "../../../../mm/gup_test.h"
 
 #define MB (1UL << 20)
 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
-#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
-
-/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
-
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 
-struct gup_test {
-	__u64 get_delta_usec;
-	__u64 put_delta_usec;
-	__u64 addr;
-	__u64 size;
-	__u32 nr_pages_per_call;
-	__u32 flags;
-	__u64 expansion[10];	/* For future use */
-};
-
 int main(int argc, char **argv)
 {
 	struct gup_test gup;
-- 
cgit v1.2.3


From c2aa8afc36fa8669ac165ace1f4d7173f21f367f Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:11 -0800
Subject: selftests/vm: rename run_vmtests --> run_vmtests.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename to *.sh, in order to match the conventions of all of the other
items in selftest/vm.

The only reason not to use a .sh suffix a shell script like this, might be
to make it look more like a normal program, but that's not an issue here.

Link: https://lkml.kernel.org/r/20201026064021.3545418-4-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 101c0315bc92..74ac457c96bb 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -73,7 +73,7 @@ TEST_GEN_FILES += virtual_address_range
 TEST_GEN_FILES += write_to_hugetlbfs
 endif
 
-TEST_PROGS := run_vmtests
+TEST_PROGS := run_vmtests.sh
 
 TEST_FILES := test_vmalloc.sh
 
-- 
cgit v1.2.3


From f545605cc08e1f1820b4c8748689e7c6d4365d99 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:14 -0800
Subject: selftests/vm: minor cleanup: Makefile and gup_test.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few cleanups that don't deserve separate patches, but that also should
not clutter up other functional changes:

1. Remove an unnecessary #include <prctl.h>

2. Restore the sorted order of TEST_GEN_FILES.

3. Add -lpthread to the common LDLIBS, as it is harmless and several
   tests use it. This gets rid of one special rule already.

Link: https://lkml.kernel.org/r/20201026064021.3545418-5-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile   | 10 ++++------
 tools/testing/selftests/vm/gup_test.c |  1 -
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 74ac457c96bb..e640d8145cbd 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -21,14 +21,15 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
 MAKEFLAGS += --no-builtin-rules
 
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
-LDLIBS = -lrt
+LDLIBS = -lrt -lpthread
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
-TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += khugepaged
 TEST_GEN_FILES += map_fixed_noreplace
+TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
@@ -37,7 +38,6 @@ TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += khugepaged
 
 ifeq ($(ARCH),x86_64)
 CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
@@ -80,7 +80,7 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
+$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs
 
 ifeq ($(ARCH),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
@@ -131,8 +131,6 @@ warn_32bit_failure:
 endif
 endif
 
-$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
-
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
 
 $(OUTPUT)/gup_test: ../../../../mm/gup_test.h
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 03f7c4f1beaf..1a54771ad97e 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -4,7 +4,6 @@
 #include <unistd.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
-#include <sys/prctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include "../../../../mm/gup_test.h"
-- 
cgit v1.2.3


From a9bed1e1c2a9bb36cdd29af0ef48044d1b9e8c2a Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:17 -0800
Subject: selftests/vm: only some gup_test items are really benchmarks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Therefore, some minor cleanup and improvements are in order:

1. Rename the other items appropriately.

2. Stop reporting timing information on the non-benchmark items. It's
   still being recorded and is available, but there's no point in
   cluttering up the report with data that no one reasonably needs to
   check.

3. Don't do iterations, for non-benchmark items.

4. Print out a shorter, more appropriate report for the non-benchmark
   tests.

5. Add the command that was run, to the report. This really helps, as
   there are quite a lot of options now.

6. Use a larger integer type for cmd, now that it's being compared
   Otherwise it doesn't work, because in this case cmd is about 3 billion,
   which is the perfect size for problems with signed vs unsigned int.

Link: https://lkml.kernel.org/r/20201026064021.3545418-6-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/gup_test.c | 47 +++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1a54771ad97e..f9163e1bb57a 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -14,12 +14,30 @@
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 
+static char *cmd_to_str(unsigned long cmd)
+{
+	switch (cmd) {
+	case GUP_FAST_BENCHMARK:
+		return "GUP_FAST_BENCHMARK";
+	case PIN_FAST_BENCHMARK:
+		return "PIN_FAST_BENCHMARK";
+	case PIN_LONGTERM_BENCHMARK:
+		return "PIN_LONGTERM_BENCHMARK";
+	case GUP_BASIC_TEST:
+		return "GUP_BASIC_TEST";
+	case PIN_BASIC_TEST:
+		return "PIN_BASIC_TEST";
+	}
+	return "Unknown command";
+}
+
 int main(int argc, char **argv)
 {
 	struct gup_test gup;
 	unsigned long size = 128 * MB;
 	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
-	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+	unsigned long cmd = GUP_FAST_BENCHMARK;
+	int flags = MAP_PRIVATE;
 	char *file = "/dev/zero";
 	char *p;
 
@@ -29,7 +47,7 @@ int main(int argc, char **argv)
 			cmd = PIN_FAST_BENCHMARK;
 			break;
 		case 'b':
-			cmd = PIN_BENCHMARK;
+			cmd = PIN_BASIC_TEST;
 			break;
 		case 'L':
 			cmd = PIN_LONGTERM_BENCHMARK;
@@ -50,7 +68,7 @@ int main(int argc, char **argv)
 			thp = 0;
 			break;
 		case 'U':
-			cmd = GUP_BENCHMARK;
+			cmd = GUP_BASIC_TEST;
 			break;
 		case 'u':
 			cmd = GUP_FAST_BENCHMARK;
@@ -104,18 +122,31 @@ int main(int argc, char **argv)
 	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
 		p[0] = 0;
 
-	for (i = 0; i < repeats; i++) {
+	/* Only report timing information on the *_BENCHMARK commands: */
+	if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+	     (cmd == PIN_LONGTERM_BENCHMARK)) {
+		for (i = 0; i < repeats; i++) {
+			gup.size = size;
+			if (ioctl(fd, cmd, &gup))
+				perror("ioctl"), exit(1);
+
+			printf("%s: Time: get:%lld put:%lld us",
+			       cmd_to_str(cmd), gup.get_delta_usec,
+			       gup.put_delta_usec);
+			if (gup.size != size)
+				printf(", truncated (size: %lld)", gup.size);
+			printf("\n");
+		}
+	} else {
 		gup.size = size;
 		if (ioctl(fd, cmd, &gup)) {
 			perror("ioctl");
 			exit(1);
 		}
 
-		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
-			gup.put_delta_usec);
+		printf("%s: done\n", cmd_to_str(cmd));
 		if (gup.size != size)
-			printf(", truncated (size: %lld)", gup.size);
-		printf("\n");
+			printf("Truncated (size: %lld)\n", gup.size);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From f4f9bda418ab8b4dbc5372e9e2a28162f7777154 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:21 -0800
Subject: selftests/vm: gup_test: introduce the dump_pages() sub-test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For quite a while, I was doing a quick hack to gup_test.c (previously,
gup_benchmark.c) whenever I wanted to try out my changes to dump_page().
This makes that hack unnecessary, and instead allows anyone to easily get
the same coverage from a user space program.  That saves a lot of time
because you don't have to change the kernel, in order to test different
pages and options.

The new sub-test takes advantage of the existing gup_test infrastructure,
which already provides a simple user space program, some allocated user
space pages, an ioctl call, pinning of those pages (via either
get_user_pages or pin_user_pages) and a corresponding kernel-side test
invocation.  There's not much more required, mainly just a couple of
inputs from the user.

In fact, the new test re-uses the existing command line options in order
to get various helpful combinations (THP or normal, _fast or slow gup, gup
vs.  pup, and more).

New command line options are: which pages to dump, and what type of
"get/pin" to use.

In order to figure out which pages to dump, the logic is:

* If the user doesn't specify anything, the page 0 (the first page in
  the address range that the program sets up for testing) is dumped.

* Or, the user can type up to 8 page indices anywhere on the command
  line.  If you type more than 8, then it uses the first 8 and ignores the
  remaining items.

For example:

    ./gup_test -ct -F 1 0 19 0x1000

Meaning:
    -c:          dump pages sub-test
    -t:          use THP pages
    -F 1:        use pin_user_pages() instead of get_user_pages()
    0 19 0x1000: dump pages 0, 19, and 4096

Link: https://lkml.kernel.org/r/20201026064021.3545418-7-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/gup_test.c | 45 +++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index f9163e1bb57a..6c6336dd3b7f 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -27,13 +27,15 @@ static char *cmd_to_str(unsigned long cmd)
 		return "GUP_BASIC_TEST";
 	case PIN_BASIC_TEST:
 		return "PIN_BASIC_TEST";
+	case DUMP_USER_PAGES_TEST:
+		return "DUMP_USER_PAGES_TEST";
 	}
 	return "Unknown command";
 }
 
 int main(int argc, char **argv)
 {
-	struct gup_test gup;
+	struct gup_test gup = { 0 };
 	unsigned long size = 128 * MB;
 	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
 	unsigned long cmd = GUP_FAST_BENCHMARK;
@@ -41,7 +43,7 @@ int main(int argc, char **argv)
 	char *file = "/dev/zero";
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwSH")) != -1) {
 		switch (opt) {
 		case 'a':
 			cmd = PIN_FAST_BENCHMARK;
@@ -52,6 +54,21 @@ int main(int argc, char **argv)
 		case 'L':
 			cmd = PIN_LONGTERM_BENCHMARK;
 			break;
+		case 'c':
+			cmd = DUMP_USER_PAGES_TEST;
+			/*
+			 * Dump page 0 (index 1). May be overridden later, by
+			 * user's non-option arguments.
+			 *
+			 * .which_pages is zero-based, so that zero can mean "do
+			 * nothing".
+			 */
+			gup.which_pages[0] = 1;
+			break;
+		case 'F':
+			/* strtol, so you can pass flags in hex form */
+			gup.flags = strtol(optarg, 0, 0);
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -91,6 +108,30 @@ int main(int argc, char **argv)
 		}
 	}
 
+	if (optind < argc) {
+		int extra_arg_count = 0;
+		/*
+		 * For example:
+		 *
+		 *   ./gup_test -c 0 1 0x1001
+		 *
+		 * ...to dump pages 0, 1, and 4097
+		 */
+
+		while ((optind < argc) &&
+		       (extra_arg_count < GUP_TEST_MAX_PAGES_TO_DUMP)) {
+			/*
+			 * Do the 1-based indexing here, so that the user can
+			 * use normal 0-based indexing on the command line.
+			 */
+			long page_index = strtol(argv[optind], 0, 0) + 1;
+
+			gup.which_pages[extra_arg_count] = page_index;
+			extra_arg_count++;
+			optind++;
+		}
+	}
+
 	filed = open(file, O_RDWR|O_CREAT);
 	if (filed < 0) {
 		perror("open");
-- 
cgit v1.2.3


From d943fe81e0bf49dda1369e87d49c5276a02698df Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:24 -0800
Subject: selftests/vm: run_vmtests.sh: update and clean up gup_test invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Run benchmarks on the _fast variants of gup and pup, as originally
intended.

Run the new gup_test sub-test: dump pages.  In addition to exercising the
dump_page() call, it also demonstrates the various options you can use to
specify which pages to dump, and how.

Link: https://lkml.kernel.org/r/20201026064021.3545418-8-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index d1843d5f3c30..4ac84b350d9f 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -123,10 +123,10 @@ else
 	echo "[PASS]"
 fi
 
-echo "--------------------------------------------"
-echo "running 'gup_test -U' (normal/slow gup)"
-echo "--------------------------------------------"
-./gup_test -U
+echo "------------------------------------------------------"
+echo "running: gup_test -u # get_user_pages_fast() benchmark"
+echo "------------------------------------------------------"
+./gup_test -u
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
@@ -134,10 +134,22 @@ else
 	echo "[PASS]"
 fi
 
-echo "------------------------------------------"
-echo "running gup_test -b (pin_user_pages)"
-echo "------------------------------------------"
-./gup_test -b
+echo "------------------------------------------------------"
+echo "running: gup_test -a # pin_user_pages_fast() benchmark"
+echo "------------------------------------------------------"
+./gup_test -a
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "------------------------------------------------------------"
+echo "# Dump pages 0, 19, and 4096, using pin_user_pages:"
+echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test"
+echo "------------------------------------------------------------"
+./gup_test -ct -F 0x1 0 19 0x1000
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3


From f3a45709d2bb1b6cbab2899a6c8e75dfb8e4aad7 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:28 -0800
Subject: selftests/vm: hmm-tests: remove the libhugetlbfs dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HMM selftests are incredibly useful, but they are only effective if people
actually build and run them.  All the other tests in selftests/vm can be
built with very standard, always-available libraries: libpthread, librt.
The hmm-tests.c program, on the other hand, requires something that is
(much) less readily available: libhugetlbfs.  And so the build will
typically fail for many developers.

A simple attempt to install libhugetlbfs will also run into complications
on some common distros these days: Fedora and Arch Linux (yes, Arch AUR
has it, but that's fragile, as always with AUR).  The library is not
maintained actively enough at the moment, for distros to deal with it.  I
had to build it from source, for Fedora, and that didn't go too smoothly
either.

It turns out that, out of 21 tests in hmm-tests.c, only 2 actually require
functionality from libhugetlbfs.  Therefore, if libhugetlbfs is missing,
simply ifdef those two tests out and allow the developer to at least have
the other 19 tests, if they don't want to pause to work through the above
issues.  Also issue a warning, so that it's clear that there is an
imperfection in the build.

In order to do that, a tiny shell script (check_config.sh) runs a quick
compile (not link, that's too prone to false failures with library paths),
and basically, if the compiler doesn't find hugetlbfs.h in its standard
locations, then the script concludes that libhugetlbfs is not available.
The output is in two files, one for inclusion in hmm-test.c
(local_config.h), and one for inclusion in the Makefile (local_config.mk).

Link: https://lkml.kernel.org/r/20201026064021.3545418-9-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore      |  1 +
 tools/testing/selftests/vm/Makefile        | 24 +++++++++++++++++++++--
 tools/testing/selftests/vm/check_config.sh | 31 ++++++++++++++++++++++++++++++
 tools/testing/selftests/vm/hmm-tests.c     | 10 +++++++++-
 4 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/vm/check_config.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2c8ddcf41c0e..e90d28bcd518 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -20,3 +20,4 @@ va_128TBswitch
 map_fixed_noreplace
 write_to_hugetlbfs
 hmm-tests
+local_config.*
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index e640d8145cbd..bdd3fe7fd086 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for vm selftests
+
+include local_config.mk
+
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
 
@@ -80,8 +83,6 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs
-
 ifeq ($(ARCH),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
@@ -134,3 +135,22 @@ endif
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
 
 $(OUTPUT)/gup_test: ../../../../mm/gup_test.h
+
+$(OUTPUT)/hmm-tests: local_config.h
+
+# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
+
+local_config.mk local_config.h: check_config.sh
+	/bin/sh ./check_config.sh $(CC)
+
+EXTRA_CLEAN += local_config.mk local_config.h
+
+ifeq ($(HMM_EXTRA_LIBS),)
+all: warn_missing_hugelibs
+
+warn_missing_hugelibs:
+	@echo ; \
+	echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \
+	echo
+endif
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh
new file mode 100644
index 000000000000..079c8a40b85d
--- /dev/null
+++ b/tools/testing/selftests/vm/check_config.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Probe for libraries and create header files to record the results. Both C
+# header files and Makefile include fragments are created.
+
+OUTPUT_H_FILE=local_config.h
+OUTPUT_MKFILE=local_config.mk
+
+# libhugetlbfs
+tmpname=$(mktemp)
+tmpfile_c=${tmpname}.c
+tmpfile_o=${tmpname}.o
+
+echo "#include <sys/types.h>"        > $tmpfile_c
+echo "#include <hugetlbfs.h>"       >> $tmpfile_c
+echo "int func(void) { return 0; }" >> $tmpfile_c
+
+CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
+$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
+
+if [ -f $tmpfile_o ]; then
+    echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE
+    echo "HMM_EXTRA_LIBS = -lhugetlbfs"             > $OUTPUT_MKFILE
+else
+    echo "// No libhugetlbfs support found"      > $OUTPUT_H_FILE
+    echo "# No libhugetlbfs support found, so:"  > $OUTPUT_MKFILE
+    echo "HMM_EXTRA_LIBS = "                    >> $OUTPUT_MKFILE
+fi
+
+rm ${tmpname}.*
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index c9404ef9698e..5d1ac691b9f4 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -21,12 +21,16 @@
 #include <strings.h>
 #include <time.h>
 #include <pthread.h>
-#include <hugetlbfs.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 
+#include "./local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
+#include <hugetlbfs.h>
+#endif
+
 /*
  * This is a private UAPI to the kernel test module so it isn't exported
  * in the usual include/uapi/... directory.
@@ -662,6 +666,7 @@ TEST_F(hmm, anon_write_huge)
 	hmm_buffer_free(buffer);
 }
 
+#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
 /*
  * Write huge TLBFS page.
  */
@@ -720,6 +725,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
 	buffer->ptr = NULL;
 	hmm_buffer_free(buffer);
 }
+#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
 
 /*
  * Read mmap'ed file memory.
@@ -1336,6 +1342,7 @@ TEST_F(hmm2, snapshot)
 	hmm_buffer_free(buffer);
 }
 
+#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
 /*
  * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
  * should be mapped by a large page table entry.
@@ -1411,6 +1418,7 @@ TEST_F(hmm, compound)
 	buffer->ptr = NULL;
 	hmm_buffer_free(buffer);
 }
+#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
 
 /*
  * Test two devices reading the same memory (double mapped).
-- 
cgit v1.2.3


From a26c4c62990a3ad5061f72e68f2394a01480265d Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:31 -0800
Subject: selftests/vm: 2x speedup for run_vmtests.sh

Each invocation of userfaultfd for "anon" and "shmem" was taking about
6.5 sec to run, contributing to an overall run time of about 22 sec for
run_vmtests.sh.

Reduce the size and bounce input values to the userfaultfd invocation
within run_vmtests.sh, enough to get each invocation down to about 1.0
sec. This should still provide a reasonable smoke test, while staying
within a nominal time budget of around 1 second or so per test. And this
brings the overall running time of run_vmtests.sh down to 11 second.

Link: https://lkml.kernel.org/r/20201026064021.3545418-10-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 4ac84b350d9f..9e8837768ee2 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -160,7 +160,7 @@ fi
 echo "-------------------"
 echo "running userfaultfd"
 echo "-------------------"
-./userfaultfd anon 128 32
+./userfaultfd anon 20 16
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
@@ -185,7 +185,7 @@ rm -f $mnt/ufd_test_file
 echo "-------------------------"
 echo "running userfaultfd_shmem"
 echo "-------------------------"
-./userfaultfd shmem 128 32
+./userfaultfd shmem 20 16
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3


From 7df666253f2610284f653bce0e2e50b4923c84aa Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Mon, 14 Dec 2020 19:07:25 -0800
Subject: kselftests: vm: add mremap tests

Patch series "Speed up mremap on large regions", v4.

mremap time can be optimized by moving entries at the PMD/PUD level if the
source and destination addresses are PMD/PUD-aligned and PMD/PUD-sized.
Enable moving at the PMD and PUD levels on arm64 and x86.  Other
architectures where this type of move is supported and known to be safe
can also opt-in to these optimizations by enabling HAVE_MOVE_PMD and
HAVE_MOVE_PUD.

Observed Performance Improvements for remapping a PUD-aligned 1GB-sized
region on x86 and arm64:

    - HAVE_MOVE_PMD is already enabled on x86 : N/A
    - Enabling HAVE_MOVE_PUD on x86   : ~13x speed up

    - Enabling HAVE_MOVE_PMD on arm64 : ~ 8x speed up
    - Enabling HAVE_MOVE_PUD on arm64 : ~19x speed up

          Altogether, HAVE_MOVE_PMD and HAVE_MOVE_PUD
          give a total of ~150x speed up on arm64.

This patch (of 4):

Test mremap on regions of various sizes and alignments and validate data
after remapping.  Also provide total time for remapping the region which
is useful for performance comparison of the mremap optimizations that move
pages at the PMD/PUD levels if HAVE_MOVE_PMD and/or HAVE_MOVE_PUD are
enabled.

Link: https://lkml.kernel.org/r/20201014005320.2233162-1-kaleshsingh@google.com
Link: https://lkml.kernel.org/r/20201014005320.2233162-2-kaleshsingh@google.com
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Minchan Kim <minchan@google.com>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Hassan Naveed <hnaveed@wavecomp.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Jia He <justin.he@arm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: SeongJae Park <sjpark@amazon.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore    |   1 +
 tools/testing/selftests/vm/Makefile      |   1 +
 tools/testing/selftests/vm/mremap_test.c | 344 +++++++++++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests   |  11 +
 4 files changed, 357 insertions(+)
 create mode 100644 tools/testing/selftests/vm/mremap_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index e90d28bcd518..9a35c3f6a557 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -8,6 +8,7 @@ thuge-gen
 compaction_test
 mlock2-tests
 mremap_dontunmap
+mremap_test
 on-fault-limit
 transhuge-stress
 protection_keys
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index bdd3fe7fd086..9a25307f6115 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -37,6 +37,7 @@ TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
 TEST_GEN_FILES += mremap_dontunmap
+TEST_GEN_FILES += mremap_test
 TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
new file mode 100644
index 000000000000..9c391d016922
--- /dev/null
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Google LLC
+ */
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+
+#include "../kselftest.h"
+
+#define EXPECT_SUCCESS 0
+#define EXPECT_FAILURE 1
+#define NON_OVERLAPPING 0
+#define OVERLAPPING 1
+#define NS_PER_SEC 1000000000ULL
+#define VALIDATION_DEFAULT_THRESHOLD 4	/* 4MB */
+#define VALIDATION_NO_THRESHOLD 0	/* Verify the entire region */
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
+struct config {
+	unsigned long long src_alignment;
+	unsigned long long dest_alignment;
+	unsigned long long region_size;
+	int overlapping;
+};
+
+struct test {
+	const char *name;
+	struct config config;
+	int expect_failure;
+};
+
+enum {
+	_1KB = 1ULL << 10,	/* 1KB -> not page aligned */
+	_4KB = 4ULL << 10,
+	_8KB = 8ULL << 10,
+	_1MB = 1ULL << 20,
+	_2MB = 2ULL << 20,
+	_4MB = 4ULL << 20,
+	_1GB = 1ULL << 30,
+	_2GB = 2ULL << 30,
+	PTE = _4KB,
+	PMD = _2MB,
+	PUD = _1GB,
+};
+
+#define MAKE_TEST(source_align, destination_align, size,	\
+		  overlaps, should_fail, test_name)		\
+{								\
+	.name = test_name,					\
+	.config = {						\
+		.src_alignment = source_align,			\
+		.dest_alignment = destination_align,		\
+		.region_size = size,				\
+		.overlapping = overlaps,			\
+	},							\
+	.expect_failure = should_fail				\
+}
+
+/*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+ */
+static void *get_source_mapping(struct config c)
+{
+	unsigned long long addr = 0ULL;
+	void *src_addr = NULL;
+retry:
+	addr += c.src_alignment;
+	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+			MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (src_addr == MAP_FAILED) {
+		if (errno == EPERM)
+			goto retry;
+		goto error;
+	}
+	/*
+	 * Check that the address is aligned to the specified alignment.
+	 * Addresses which have alignments that are multiples of that
+	 * specified are not considered valid. For instance, 1GB address is
+	 * 2MB-aligned, however it will not be considered valid for a
+	 * requested alignment of 2MB. This is done to reduce coincidental
+	 * alignment in the tests.
+	 */
+	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+			!((unsigned long long) src_addr & c.src_alignment))
+		goto retry;
+
+	if (!src_addr)
+		goto error;
+
+	return src_addr;
+error:
+	ksft_print_msg("Failed to map source region: %s\n",
+			strerror(errno));
+	return NULL;
+}
+
+/* Returns the time taken for the remap on success else returns -1. */
+static long long remap_region(struct config c, unsigned int threshold_mb,
+			      char pattern_seed)
+{
+	void *addr, *src_addr, *dest_addr;
+	unsigned long long i;
+	struct timespec t_start = {0, 0}, t_end = {0, 0};
+	long long  start_ns, end_ns, align_mask, ret, offset;
+	unsigned long long threshold;
+
+	if (threshold_mb == VALIDATION_NO_THRESHOLD)
+		threshold = c.region_size;
+	else
+		threshold = MIN(threshold_mb * _1MB, c.region_size);
+
+	src_addr = get_source_mapping(c);
+	if (!src_addr) {
+		ret = -1;
+		goto out;
+	}
+
+	/* Set byte pattern */
+	srand(pattern_seed);
+	for (i = 0; i < threshold; i++)
+		memset((char *) src_addr + i, (char) rand(), 1);
+
+	/* Mask to zero out lower bits of address for alignment */
+	align_mask = ~(c.dest_alignment - 1);
+	/* Offset of destination address from the end of the source region */
+	offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment;
+	addr = (void *) (((unsigned long long) src_addr + c.region_size
+			  + offset) & align_mask);
+
+	/* See comment in get_source_mapping() */
+	if (!((unsigned long long) addr & c.dest_alignment))
+		addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+
+	clock_gettime(CLOCK_MONOTONIC, &t_start);
+	dest_addr = mremap(src_addr, c.region_size, c.region_size,
+			MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+	clock_gettime(CLOCK_MONOTONIC, &t_end);
+
+	if (dest_addr == MAP_FAILED) {
+		ksft_print_msg("mremap failed: %s\n", strerror(errno));
+		ret = -1;
+		goto clean_up_src;
+	}
+
+	/* Verify byte pattern after remapping */
+	srand(pattern_seed);
+	for (i = 0; i < threshold; i++) {
+		char c = (char) rand();
+
+		if (((char *) dest_addr)[i] != c) {
+			ksft_print_msg("Data after remap doesn't match at offset %d\n",
+				       i);
+			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+					((char *) dest_addr)[i] & 0xff);
+			ret = -1;
+			goto clean_up_dest;
+		}
+	}
+
+	start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
+	end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
+	ret = end_ns - start_ns;
+
+/*
+ * Since the destination address is specified using MREMAP_FIXED, subsequent
+ * mremap will unmap any previous mapping at the address range specified by
+ * dest_addr and region_size. This significantly affects the remap time of
+ * subsequent tests. So we clean up mappings after each test.
+ */
+clean_up_dest:
+	munmap(dest_addr, c.region_size);
+clean_up_src:
+	munmap(src_addr, c.region_size);
+out:
+	return ret;
+}
+
+static void run_mremap_test_case(struct test test_case, int *failures,
+				 unsigned int threshold_mb,
+				 unsigned int pattern_seed)
+{
+	long long remap_time = remap_region(test_case.config, threshold_mb,
+					    pattern_seed);
+
+	if (remap_time < 0) {
+		if (test_case.expect_failure)
+			ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+					      test_case.name);
+		else {
+			ksft_test_result_fail("%s\n", test_case.name);
+			*failures += 1;
+		}
+	} else {
+		/*
+		 * Comparing mremap time is only applicable if entire region
+		 * was faulted in.
+		 */
+		if (threshold_mb == VALIDATION_NO_THRESHOLD ||
+		    test_case.config.region_size <= threshold_mb * _1MB)
+			ksft_test_result_pass("%s\n\tmremap time: %12lldns\n",
+					      test_case.name, remap_time);
+		else
+			ksft_test_result_pass("%s\n", test_case.name);
+	}
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr,
+		"Usage: %s [[-t <threshold_mb>] [-p <pattern_seed>]]\n"
+		"-t\t only validate threshold_mb of the remapped region\n"
+		"  \t if 0 is supplied no threshold is used; all tests\n"
+		"  \t are run and remapped regions validated fully.\n"
+		"  \t The default threshold used is 4MB.\n"
+		"-p\t provide a seed to generate the random pattern for\n"
+		"  \t validating the remapped region.\n", cmd);
+}
+
+static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
+		      unsigned int *pattern_seed)
+{
+	const char *optstr = "t:p:";
+	int opt;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 't':
+			*threshold_mb = atoi(optarg);
+			break;
+		case 'p':
+			*pattern_seed = atoi(optarg);
+			break;
+		default:
+			usage(argv[0]);
+			return -1;
+		}
+	}
+
+	if (optind < argc) {
+		usage(argv[0]);
+		return -1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int failures = 0;
+	int i, run_perf_tests;
+	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+	unsigned int pattern_seed;
+	time_t t;
+
+	pattern_seed = (unsigned int) time(&t);
+
+	if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0)
+		exit(EXIT_FAILURE);
+
+	ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
+		       threshold_mb, pattern_seed);
+
+	struct test test_cases[] = {
+		/* Expected mremap failures */
+		MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE,
+		  "mremap - Source and Destination Regions Overlapping"),
+		MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
+		  "mremap - Destination Address Misaligned (1KB-aligned)"),
+		MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
+		  "mremap - Source Address Misaligned (1KB-aligned)"),
+
+		/* Src addr PTE aligned */
+		MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "8KB mremap - Source PTE-aligned, Destination PTE-aligned"),
+
+		/* Src addr 1MB aligned */
+		MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"),
+		MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"),
+
+		/* Src addr PMD aligned */
+		MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "4MB mremap - Source PMD-aligned, Destination PTE-aligned"),
+		MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"),
+		MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "4MB mremap - Source PMD-aligned, Destination PMD-aligned"),
+
+		/* Src addr PUD aligned */
+		MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination PTE-aligned"),
+		MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"),
+		MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination PMD-aligned"),
+		MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination PUD-aligned"),
+	};
+
+	struct test perf_test_cases[] = {
+		/*
+		 * mremap 1GB region - Page table level aligned time
+		 * comparison.
+		 */
+		MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "1GB mremap - Source PTE-aligned, Destination PTE-aligned"),
+		MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "1GB mremap - Source PMD-aligned, Destination PMD-aligned"),
+		MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "1GB mremap - Source PUD-aligned, Destination PUD-aligned"),
+	};
+
+	run_perf_tests =  (threshold_mb == VALIDATION_NO_THRESHOLD) ||
+				(threshold_mb * _1MB >= _1GB);
+
+	ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
+		      ARRAY_SIZE(perf_test_cases) : 0));
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+		run_mremap_test_case(test_cases[i], &failures, threshold_mb,
+				     pattern_seed);
+
+	if (run_perf_tests) {
+		ksft_print_msg("\n%s\n",
+		 "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
+		for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
+			run_mremap_test_case(perf_test_cases[i], &failures,
+					     threshold_mb, pattern_seed);
+	}
+
+	if (failures > 0)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 9e8837768ee2..e953f3cd9664 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -253,6 +253,17 @@ else
 	echo "[PASS]"
 fi
 
+echo "-------------------"
+echo "running mremap_test"
+echo "-------------------"
+./mremap_test
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 echo "-----------------"
 echo "running thuge-gen"
 echo "-----------------"
-- 
cgit v1.2.3


From f289041ed4cf9a3f6e8a32068fef9ffb2acc5662 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 14 Dec 2020 19:13:45 -0800
Subject: mm, page_poison: remove CONFIG_PAGE_POISONING_ZERO

CONFIG_PAGE_POISONING_ZERO uses the zero pattern instead of 0xAA.  It was
introduced by commit 1414c7f4f7d7 ("mm/page_poisoning.c: allow for zero
poisoning"), noting that using zeroes retains the benefit of sanitizing
content of freed pages, with the benefit of not having to zero them again
on alloc, and the downside of making some forms of corruption (stray
writes of NULLs) harder to detect than with the 0xAA pattern.  Together
with CONFIG_PAGE_POISONING_NO_SANITY it made possible to sanitize the
contents on free without checking it back on alloc.

These days we have the init_on_free() option to achieve sanitization with
zeroes and to save clearing on alloc (and without checking on alloc).
Arguably if someone does choose to check the poison for corruption on
alloc, the savings of not clearing the page are secondary, and it makes
sense to always use the 0xAA poison pattern.  Thus, remove the
CONFIG_PAGE_POISONING_ZERO option for being redundant.

Link: https://lkml.kernel.org/r/20201113104033.22907-6-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Mateusz Nosek <mateusznosek0@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/include/linux/poison.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index d29725769107..2e6338ac5eed 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -35,12 +35,8 @@
  */
 #define TIMER_ENTRY_STATIC	((void *) 0x300 + POISON_POINTER_DELTA)
 
-/********** mm/debug-pagealloc.c **********/
-#ifdef CONFIG_PAGE_POISONING_ZERO
-#define PAGE_POISON 0x00
-#else
+/********** mm/page_poison.c **********/
 #define PAGE_POISON 0xaa
-#endif
 
 /********** mm/page_alloc.c ************/
 
-- 
cgit v1.2.3


From 77f962e7ae24e5fa7b257b8242c62e716119a312 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Mon, 14 Dec 2020 19:13:58 -0800
Subject: userfaultfd: selftests: make __{s,u}64 format specifiers portable

On certain platforms (powerpcle is the one on which I ran into this),
"%Ld" and "%Lu" are unsuitable for printing __s64 and __u64, respectively,
resulting in build warnings.  Cast to {u,}int64_t, and use the PRI{d,u}64
macros defined in inttypes.h to print them.  This ought to be portable to
all platforms.

Splitting this off into a separate macro lets us remove some lines, and
get rid of some (I would argue) stylistically odd cases where we joined
printf() and exit() into a single statement with a ,.

Finally, this also fixes a "missing braces around initializer" warning
when we initialize prms in wp_range().

[axelrasmussen@google.com: v2]
  Link: https://lkml.kernel.org/r/20201203180244.1811601-1-axelrasmussen@google.com

Link: https://lkml.kernel.org/r/20201202211542.1121189-1-axelrasmussen@google.com
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Joe Perches <joe@perches.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Alan Gilbert <dgilbert@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 81 ++++++++++++++------------------
 1 file changed, 35 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index c4425597769a..a7059a6bc215 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -55,6 +55,8 @@
 #include <setjmp.h>
 #include <stdbool.h>
 #include <assert.h>
+#include <inttypes.h>
+#include <stdint.h>
 
 #include "../kselftest.h"
 
@@ -135,6 +137,13 @@ static void usage(void)
 	exit(1);
 }
 
+#define uffd_error(code, fmt, ...)                                             \
+	do {                                                                   \
+		fprintf(stderr, fmt, ##__VA_ARGS__);                           \
+		fprintf(stderr, ": %" PRId64 "\n", (int64_t)(code));           \
+		exit(1);                                                       \
+	} while (0)
+
 static void uffd_stats_reset(struct uffd_stats *uffd_stats,
 			     unsigned long n_cpus)
 {
@@ -338,7 +347,7 @@ static int my_bcmp(char *str1, char *str2, size_t n)
 
 static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 {
-	struct uffdio_writeprotect prms = { 0 };
+	struct uffdio_writeprotect prms;
 
 	/* Write protection page faults */
 	prms.range.start = start;
@@ -347,7 +356,8 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 	prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
 
 	if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
-		fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+		fprintf(stderr, "clear WP failed for address 0x%" PRIx64 "\n",
+			(uint64_t)start);
 		exit(1);
 	}
 }
@@ -481,14 +491,11 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
 	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
 		if (uffdio_copy->copy != -EEXIST) {
-			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
-				uffdio_copy->copy);
-			exit(1);
+			uffd_error(uffdio_copy->copy,
+				   "UFFDIO_COPY retry error");
 		}
-	} else {
-		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
-			uffdio_copy->copy); exit(1);
-	}
+	} else
+		uffd_error(uffdio_copy->copy, "UFFDIO_COPY retry unexpected");
 }
 
 static int __copy_page(int ufd, unsigned long offset, bool retry)
@@ -509,14 +516,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 	uffdio_copy.copy = 0;
 	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy.copy != -EEXIST) {
-			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
-				uffdio_copy.copy);
-			exit(1);
-		}
+		if (uffdio_copy.copy != -EEXIST)
+			uffd_error(uffdio_copy.copy, "UFFDIO_COPY error");
 	} else if (uffdio_copy.copy != page_size) {
-		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
-			uffdio_copy.copy); exit(1);
+		uffd_error(uffdio_copy.copy, "UFFDIO_COPY unexpected copy");
 	} else {
 		if (test_uffdio_copy_eexist && retry) {
 			test_uffdio_copy_eexist = false;
@@ -795,7 +798,8 @@ static int userfaultfd_open(int features)
 		return 1;
 	}
 	if (uffdio_api.api != UFFD_API) {
-		fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+		fprintf(stderr, "UFFDIO_API error: %" PRIu64 "\n",
+			(uint64_t)uffdio_api.api);
 		return 1;
 	}
 
@@ -957,13 +961,12 @@ static void retry_uffdio_zeropage(int ufd,
 				     offset);
 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
 		if (uffdio_zeropage->zeropage != -EEXIST) {
-			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
-				uffdio_zeropage->zeropage);
-			exit(1);
+			uffd_error(uffdio_zeropage->zeropage,
+				   "UFFDIO_ZEROPAGE retry error");
 		}
 	} else {
-		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
-			uffdio_zeropage->zeropage); exit(1);
+		uffd_error(uffdio_zeropage->zeropage,
+			   "UFFDIO_ZEROPAGE retry unexpected");
 	}
 }
 
@@ -972,6 +975,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	struct uffdio_zeropage uffdio_zeropage;
 	int ret;
 	unsigned long has_zeropage;
+	__s64 res;
 
 	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
 
@@ -983,29 +987,17 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	uffdio_zeropage.range.len = page_size;
 	uffdio_zeropage.mode = 0;
 	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+	res = uffdio_zeropage.zeropage;
 	if (ret) {
 		/* real retval in ufdio_zeropage.zeropage */
 		if (has_zeropage) {
-			if (uffdio_zeropage.zeropage == -EEXIST) {
-				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
-				exit(1);
-			} else {
-				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
-					uffdio_zeropage.zeropage);
-				exit(1);
-			}
-		} else {
-			if (uffdio_zeropage.zeropage != -EINVAL) {
-				fprintf(stderr,
-					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
-					uffdio_zeropage.zeropage);
-				exit(1);
-			}
-		}
+			uffd_error(res, "UFFDIO_ZEROPAGE %s",
+				   res == -EEXIST ? "-EEXIST" : "error");
+		} else if (res != -EINVAL)
+			uffd_error(res, "UFFDIO_ZEROPAGE not -EINVAL");
 	} else if (has_zeropage) {
-		if (uffdio_zeropage.zeropage != page_size) {
-			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
-				uffdio_zeropage.zeropage); exit(1);
+		if (res != page_size) {
+			uffd_error(res, "UFFDIO_ZEROPAGE unexpected");
 		} else {
 			if (test_uffdio_zeropage_eexist && retry) {
 				test_uffdio_zeropage_eexist = false;
@@ -1014,11 +1006,8 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 			}
 			return 1;
 		}
-	} else {
-		fprintf(stderr,
-			"UFFDIO_ZEROPAGE succeeded %Ld\n",
-			uffdio_zeropage.zeropage); exit(1);
-	}
+	} else
+		uffd_error(res, "UFFDIO_ZEROPAGE succeeded");
 
 	return 0;
 }
-- 
cgit v1.2.3


From 164c50be2878f4caf6d7973e8e0e438f182f4ded Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 14 Dec 2020 19:14:02 -0800
Subject: userfaultfd/selftests: always dump something in modes

Patch series "userfaultfd: selftests: Small fixes".

Some very trivial fixes that I kept locally to userfaultfd selftest
program.

This patch (of 3):

BOUNCE_POLL is a special bit that if cleared it means "READ" instead.
Dump that too otherwise we'll see tests with empty modes.

Link: https://lkml.kernel.org/r/20201208024709.7701-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20201208024709.7701-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index a7059a6bc215..7fc1eaf4ceaa 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -1291,6 +1291,8 @@ static int userfaultfd_stress(void)
 			printf(" ver");
 		if (bounces & BOUNCE_POLL)
 			printf(" poll");
+		else
+			printf(" read");
 		printf(", ");
 		fflush(stdout);
 
-- 
cgit v1.2.3


From 1e17a24edf9bef891bbdd02617eaab4fa6efcd7f Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 14 Dec 2020 19:14:05 -0800
Subject: userfaultfd/selftests: fix retval check for userfaultfd_open()

userfaultfd_open() returns 1 for errors rather than negatives.  Fix it on
all the callers so when UFFDIO_API failed the test will bail out.

Link: https://lkml.kernel.org/r/20201208024709.7701-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 7fc1eaf4ceaa..6e482e2f198c 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -1029,7 +1029,7 @@ static int userfaultfd_zeropage_test(void)
 	if (uffd_test_ops->release_pages(area_dst))
 		return 1;
 
-	if (userfaultfd_open(0) < 0)
+	if (userfaultfd_open(0))
 		return 1;
 	uffdio_register.range.start = (unsigned long) area_dst;
 	uffdio_register.range.len = nr_pages * page_size;
@@ -1079,7 +1079,7 @@ static int userfaultfd_events_test(void)
 
 	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
 		UFFD_FEATURE_EVENT_REMOVE;
-	if (userfaultfd_open(features) < 0)
+	if (userfaultfd_open(features))
 		return 1;
 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
 
@@ -1151,7 +1151,7 @@ static int userfaultfd_sig_test(void)
 		return 1;
 
 	features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
-	if (userfaultfd_open(features) < 0)
+	if (userfaultfd_open(features))
 		return 1;
 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
 
@@ -1231,7 +1231,7 @@ static int userfaultfd_stress(void)
 	if (!area_dst)
 		return 1;
 
-	if (userfaultfd_open(0) < 0)
+	if (userfaultfd_open(0))
 		return 1;
 
 	count_verify = malloc(nr_pages * sizeof(unsigned long long));
-- 
cgit v1.2.3


From d9f411bacfa0c3d0d97580a66f88e70f92bcf58e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 14 Dec 2020 19:14:08 -0800
Subject: userfaultfd/selftests: hint the test runner on required privilege

Now userfaultfd test program requires either root or ptrace privilege due
to the signal/event tests.  When UFFDIO_API failed, hint the test runner
about this fact verbosely.

Link: https://lkml.kernel.org/r/20201208024709.7701-4-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 6e482e2f198c..9d8650d4ba5a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -794,7 +794,8 @@ static int userfaultfd_open(int features)
 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = features;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
-		fprintf(stderr, "UFFDIO_API\n");
+		fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to "
+			"run with either root or ptrace capability.\n");
 		return 1;
 	}
 	if (uffdio_api.api != UFFD_API) {
-- 
cgit v1.2.3


From febebaf366868a4204deb3955ef5dda17f676fc1 Mon Sep 17 00:00:00 2001
From: Francis Laniel <laniel_francis@privacyrequired.com>
Date: Tue, 15 Dec 2020 20:43:54 -0800
Subject: drivers/misc/lkdtm: add new file in LKDTM to test fortified strscpy

This new test ensures that fortified strscpy has the same behavior than
vanilla strscpy (e.g.  returning -E2BIG when src content is truncated).
Finally, it generates a crash at runtime because there is a write overflow
in destination string.

Link: https://lkml.kernel.org/r/20201122162451.27551-5-laniel_francis@privacyrequired.com
Signed-off-by: Francis Laniel <laniel_francis@privacyrequired.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Axtens <dja@axtens.net>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/lkdtm/tests.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 74a8d329a72c..92ba4cc41314 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -68,3 +68,4 @@ USERCOPY_STACK_BEYOND
 USERCOPY_KERNEL
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
+FORTIFIED_STRSCPY
-- 
cgit v1.2.3