15 files changed, 4931 insertions, 402 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 67ca6c69376c..43d34a5c3ec5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -95,7 +95,7 @@ EXAMPLES
 **# bpftool prog show**
 ::
 
-  10: xdp  name some_prog  tag 005a3d2123620c8b
+  10: xdp  name some_prog  tag 005a3d2123620c8b  gpl
 	loaded_at Sep 29/20:11  uid 0
 	xlated 528B  jited 370B  memlock 4096B  map_ids 10
 
@@ -108,6 +108,7 @@ EXAMPLES
                 "id": 10,
                 "type": "xdp",
                 "tag": "005a3d2123620c8b",
+                "gpl_compatible": true,
                 "loaded_at": "Sep 29/20:11",
                 "uid": 0,
                 "bytes_xlated": 528,
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 548adb9b7317..e71a0a11afde 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -235,6 +235,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		     info->tag[0], info->tag[1], info->tag[2], info->tag[3],
 		     info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
 
+	jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
+
 	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
 
 	if (info->load_time) {
@@ -295,6 +297,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	printf("tag ");
 	fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
 	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+	printf("%s", info->gpl_compatible ? "  gpl" : "");
 	printf("\n");
 
 	if (info->load_time) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c8383a289f7b..da77a9388947 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -377,403 +377,1396 @@ union bpf_attr {
 	};
 } __attribute__((aligned(8)));
 
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- *     Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- *     Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- *     Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- *     Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ *     $ ./scripts/bpf_helpers_doc.py \
+ *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ *     $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * 	Description
+ * 		Perform a lookup in *map* for an entry associated to *key*.
+ * 	Return
+ * 		Map value associated to *key*, or **NULL** if no entry was
+ * 		found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * 	Description
+ * 		Add or update the value of the entry associated to *key* in
+ * 		*map* with *value*. *flags* is one of:
+ *
+ * 		**BPF_NOEXIST**
+ * 			The entry for *key* must not exist in the map.
+ * 		**BPF_EXIST**
+ * 			The entry for *key* must already exist in the map.
+ * 		**BPF_ANY**
+ * 			No condition on the existence of the entry for *key*.
+ *
+ * 		Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * 		**BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
+ * 		elements always exist), the helper would return an error.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * 	Description
+ * 		Delete entry with *key* from *map*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * 	Description
+ * 		For tracing programs, safely attempt to read *size* bytes from
+ * 		address *src* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  *
  * u64 bpf_ktime_get_ns(void)
- *     Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- *     Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- *     Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- *     Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- *     store bytes into packet
- *     @skb: pointer to skb
- *     @offset: offset within packet from skb->mac_header
- *     @from: pointer where to copy bytes from
- *     @len: number of bytes to store into packet
- *     @flags: bit 0 - if true, recompute skb->csum
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- *     recompute IP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where IP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- *     recompute TCP/UDP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where TCP/UDP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             bit 4 - is pseudo header
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- *     jump into another BPF program
- *     @ctx: context pointer passed to next program
- *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: 32-bit index inside array that selects specific program to run
- *     Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- *     redirect to another netdev
- *     @skb: pointer to skb
- *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: 0 on success or negative error
+ * 	Description
+ * 		Return the time elapsed since system boot, in nanoseconds.
+ * 	Return
+ * 		Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * 	Description
+ * 		This helper is a "printk()-like" facility for debugging. It
+ * 		prints a message defined by format *fmt* (of size *fmt_size*)
+ * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * 		available. It can take up to three additional **u64**
+ * 		arguments (as an eBPF helpers, the total number of arguments is
+ * 		limited to five).
+ *
+ * 		Each time the helper is called, it appends a line to the trace.
+ * 		The format of the trace is customizable, and the exact output
+ * 		one will get depends on the options set in
+ * 		*\/sys/kernel/debug/tracing/trace_options* (see also the
+ * 		*README* file under the same directory). However, it usually
+ * 		defaults to something like:
+ *
+ * 		::
+ *
+ * 			telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * 		In the above:
+ *
+ * 			* ``telnet`` is the name of the current task.
+ * 			* ``470`` is the PID of the current task.
+ * 			* ``001`` is the CPU number on which the task is
+ * 			  running.
+ * 			* In ``.N..``, each character refers to a set of
+ * 			  options (whether irqs are enabled, scheduling
+ * 			  options, whether hard/softirqs are running, level of
+ * 			  preempt_disabled respectively). **N** means that
+ * 			  **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * 			  are set.
+ * 			* ``419421.045894`` is a timestamp.
+ * 			* ``0x00000001`` is a fake value used by BPF for the
+ * 			  instruction pointer register.
+ * 			* ``<formatted msg>`` is the message formatted with
+ * 			  *fmt*.
+ *
+ * 		The conversion specifiers supported by *fmt* are similar, but
+ * 		more limited than for printk(). They are **%d**, **%i**,
+ * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * 		of field, padding with zeroes, etc.) is available, and the
+ * 		helper will return **-EINVAL** (but print nothing) if it
+ * 		encounters an unknown specifier.
+ *
+ * 		Also, note that **bpf_trace_printk**\ () is slow, and should
+ * 		only be used for debugging purposes. For this reason, a notice
+ * 		bloc (spanning several lines) is printed to kernel logs and
+ * 		states that the helper should not be used "for production use"
+ * 		the first time this helper is used (or more precisely, when
+ * 		**trace_printk**\ () buffers are allocated). For passing values
+ * 		to user space, perf events should be preferred.
+ * 	Return
+ * 		The number of bytes written to the buffer, or a negative error
+ * 		in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * 	Description
+ * 		Get a pseudo-random number.
+ *
+ * 		From a security point of view, this helper uses its own
+ * 		pseudo-random internal state, and cannot be used to infer the
+ * 		seed of other random functions in the kernel. However, it is
+ * 		essential to note that the generator used by the helper is not
+ * 		cryptographically secure.
+ * 	Return
+ * 		A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * 	Description
+ * 		Get the SMP (symmetric multiprocessing) processor id. Note that
+ * 		all programs run with preemption disabled, which means that the
+ * 		SMP processor id is stable during all the execution of the
+ * 		program.
+ * 	Return
+ * 		The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * 	Description
+ * 		Store *len* bytes from address *from* into the packet
+ * 		associated to *skb*, at *offset*. *flags* are a combination of
+ * 		**BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * 		checksum for the packet after storing the bytes) and
+ * 		**BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * 		**->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * 	Description
+ * 		Recompute the layer 3 (e.g. IP) checksum for the packet
+ * 		associated to *skb*. Computation is incremental, so the helper
+ * 		must know the former value of the header field that was
+ * 		modified (*from*), the new value of this field (*to*), and the
+ * 		number of bytes (2 or 4) for this field, stored in *size*.
+ * 		Alternatively, it is possible to store the difference between
+ * 		the previous and the new values of the header field in *to*, by
+ * 		setting *from* and *size* to 0. For both methods, *offset*
+ * 		indicates the location of the IP checksum within the packet.
+ *
+ * 		This helper works in combination with **bpf_csum_diff**\ (),
+ * 		which does not update the checksum in-place, but offers more
+ * 		flexibility and can handle sizes larger than 2 or 4 for the
+ * 		checksum to update.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * 	Description
+ * 		Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * 		packet associated to *skb*. Computation is incremental, so the
+ * 		helper must know the former value of the header field that was
+ * 		modified (*from*), the new value of this field (*to*), and the
+ * 		number of bytes (2 or 4) for this field, stored on the lowest
+ * 		four bits of *flags*. Alternatively, it is possible to store
+ * 		the difference between the previous and the new values of the
+ * 		header field in *to*, by setting *from* and the four lowest
+ * 		bits of *flags* to 0. For both methods, *offset* indicates the
+ * 		location of the IP checksum within the packet. In addition to
+ * 		the size of the field, *flags* can be added (bitwise OR) actual
+ * 		flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * 		untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * 		for updates resulting in a null checksum the value is set to
+ * 		**CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * 		the checksum is to be computed against a pseudo-header.
+ *
+ * 		This helper works in combination with **bpf_csum_diff**\ (),
+ * 		which does not update the checksum in-place, but offers more
+ * 		flexibility and can handle sizes larger than 2 or 4 for the
+ * 		checksum to update.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * 	Description
+ * 		This special helper is used to trigger a "tail call", or in
+ * 		other words, to jump into another eBPF program. The same stack
+ * 		frame is used (but values on stack and in registers for the
+ * 		caller are not accessible to the callee). This mechanism allows
+ * 		for program chaining, either for raising the maximum number of
+ * 		available eBPF instructions, or to execute given programs in
+ * 		conditional blocks. For security reasons, there is an upper
+ * 		limit to the number of successive tail calls that can be
+ * 		performed.
+ *
+ * 		Upon call of this helper, the program attempts to jump into a
+ * 		program referenced at index *index* in *prog_array_map*, a
+ * 		special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * 		*ctx*, a pointer to the context.
+ *
+ * 		If the call succeeds, the kernel immediately runs the first
+ * 		instruction of the new program. This is not a function call,
+ * 		and it never returns to the previous program. If the call
+ * 		fails, then the helper has no effect, and the caller continues
+ * 		to run its subsequent instructions. A call can fail if the
+ * 		destination program for the jump does not exist (i.e. *index*
+ * 		is superior to the number of entries in *prog_array_map*), or
+ * 		if the maximum number of tail calls has been reached for this
+ * 		chain of programs. This limit is defined in the kernel by the
+ * 		macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * 		which is currently set to 32.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * 	Description
+ * 		Clone and redirect the packet associated to *skb* to another
+ * 		net device of index *ifindex*. Both ingress and egress
+ * 		interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * 		value in *flags* is used to make the distinction (ingress path
+ * 		is selected if the flag is present, egress path otherwise).
+ * 		This is the only flag supported for now.
+ *
+ * 		In comparison with **bpf_redirect**\ () helper,
+ * 		**bpf_clone_redirect**\ () has the associated cost of
+ * 		duplicating the packet buffer, but this can be executed out of
+ * 		the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * 		efficient, but it is handled through an action code where the
+ * 		redirection happens only after the eBPF program has returned.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
- *     Return: current->tgid << 32 | current->pid
+ * 	Return
+ * 		A 64-bit integer containing the current tgid and pid, and
+ * 		created as such:
+ * 		*current_task*\ **->tgid << 32 \|**
+ * 		*current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
- *     Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- *     stores current->comm into buf
- *     Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- *     retrieve a proc's classid
- *     @skb: pointer to skb
- *     Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- *     retrieve or populate tunnel metadata
- *     @skb: pointer to skb
- *     @key: pointer to 'struct bpf_tunnel_key'
- *     @size: size of 'struct bpf_tunnel_key'
- *     @flags: room for future extensions
- *     Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- *     read perf event counter value
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- *     redirect to another netdev
- *     @ifindex: ifindex of the net device
- *     @flags:
- *	  cls_bpf:
- *          bit 0 - if set, redirect to ingress instead of egress
- *          other bits - reserved
- *	  xdp_bpf:
- *	    all bits - reserved
- *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- *     redirect to endpoint in map
- *     @map: pointer to dev map
- *     @key: index in map to lookup
- *     @flags: --
- *     Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- *     retrieve a dst's tclassid
- *     @skb: pointer to skb
- *     Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- *     output perf raw sample
- *     @ctx: struct pt_regs*
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @data: data on stack to be output as raw data
- *     @size: size of data
- *     Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- *     walk user or kernel stack and return id
- *     @ctx: struct pt_regs*
- *     @map: pointer to stack_trace map
- *     @flags: bits 0-7 - numer of stack frames to skip
- *             bit 8 - collect user stack instead of kernel
- *             bit 9 - compare stacks by hash only
- *             bit 10 - if two different stacks hash into the same stackid
- *                      discard old
- *             other bits - reserved
- *     Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- *     calculate csum diff
- *     @from: raw from buffer
- *     @from_size: length of from buffer
- *     @to: raw to buffer
- *     @to_size: length of to buffer
- *     @seed: optional seed
- *     Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- *     retrieve tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- *     populate tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- *     Change protocol of the skb. Currently supported is v4 -> v6,
- *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
- *     program is expected to fill the new headers via skb_store_bytes
- *     and lX_csum_replace.
- *     @skb: pointer to skb
- *     @proto: new skb->protocol type
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- *     Change packet type of skb.
- *     @skb: pointer to skb
- *     @type: new skb->pkt_type type
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- *     Check cgroup2 membership of skb
- *     @skb: pointer to skb
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 skb failed the cgroup2 descendant test
- *       == 1 skb succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- *     Retrieve and possibly recalculate skb->hash.
- *     @skb: pointer to skb
- *     Return: hash
+ * 	Return
+ * 		A 64-bit integer containing the current GID and UID, and
+ * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * 	Description
+ * 		Copy the **comm** attribute of the current task into *buf* of
+ * 		*size_of_buf*. The **comm** attribute contains the name of
+ * 		the executable (excluding the path) for the current task. The
+ * 		*size_of_buf* must be strictly positive. On success, the
+ * 		helper makes sure that the *buf* is NUL-terminated. On failure,
+ * 		it is filled with zeroes.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * 	Description
+ * 		Retrieve the classid for the current task, i.e. for the net_cls
+ * 		cgroup to which *skb* belongs.
+ *
+ * 		This helper can be used on TC egress path, but not on ingress.
+ *
+ * 		The net_cls cgroup provides an interface to tag network packets
+ * 		based on a user-provided identifier for all traffic coming from
+ * 		the tasks belonging to the related cgroup. See also the related
+ * 		kernel documentation, available from the Linux sources in file
+ * 		*Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * 		The Linux kernel has two versions for cgroups: there are
+ * 		cgroups v1 and cgroups v2. Both are available to users, who can
+ * 		use a mixture of them, but note that the net_cls cgroup is for
+ * 		cgroup v1 only. This makes it incompatible with BPF programs
+ * 		run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * 		only hold data for one version of cgroups at a time).
+ *
+ * 		This helper is only available is the kernel was compiled with
+ * 		the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * 		"**y**" or to "**m**".
+ * 	Return
+ * 		The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * 	Description
+ * 		Push a *vlan_tci* (VLAN tag control information) of protocol
+ * 		*vlan_proto* to the packet associated to *skb*, then update
+ * 		the checksum. Note that if *vlan_proto* is different from
+ * 		**ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * 		be **ETH_P_8021Q**.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * 	Description
+ * 		Pop a VLAN header from the packet associated to *skb*.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * 	Description
+ * 		Get tunnel metadata. This helper takes a pointer *key* to an
+ * 		empty **struct bpf_tunnel_key** of **size**, that will be
+ * 		filled with tunnel metadata for the packet associated to *skb*.
+ * 		The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * 		indicates that the tunnel is based on IPv6 protocol instead of
+ * 		IPv4.
+ *
+ * 		The **struct bpf_tunnel_key** is an object that generalizes the
+ * 		principal parameters used by various tunneling protocols into a
+ * 		single struct. This way, it can be used to easily make a
+ * 		decision based on the contents of the encapsulation header,
+ * 		"summarized" in this struct. In particular, it holds the IP
+ * 		address of the remote end (IPv4 or IPv6, depending on the case)
+ * 		in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * 		this struct exposes the *key*\ **->tunnel_id**, which is
+ * 		generally mapped to a VNI (Virtual Network Identifier), making
+ * 		it programmable together with the **bpf_skb_set_tunnel_key**\
+ * 		() helper.
+ *
+ * 		Let's imagine that the following code is part of a program
+ * 		attached to the TC ingress interface, on one end of a GRE
+ * 		tunnel, and is supposed to filter out all messages coming from
+ * 		remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * 		::
+ *
+ * 			int ret;
+ * 			struct bpf_tunnel_key key = {};
+ * 			
+ * 			ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * 			if (ret < 0)
+ * 				return TC_ACT_SHOT;	// drop packet
+ * 			
+ * 			if (key.remote_ipv4 != 0x0a000001)
+ * 				return TC_ACT_SHOT;	// drop packet
+ * 			
+ * 			return TC_ACT_OK;		// accept packet
+ *
+ * 		This interface can also be used with all encapsulation devices
+ * 		that can operate in "collect metadata" mode: instead of having
+ * 		one network device per specific configuration, the "collect
+ * 		metadata" mode only requires a single device where the
+ * 		configuration can be extracted from this helper.
+ *
+ * 		This can be used together with various tunnels such as VXLan,
+ * 		Geneve, GRE or IP in IP (IPIP).
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * 	Description
+ * 		Populate tunnel metadata for packet associated to *skb.* The
+ * 		tunnel metadata is set to the contents of *key*, of *size*. The
+ * 		*flags* can be set to a combination of the following values:
+ *
+ * 		**BPF_F_TUNINFO_IPV6**
+ * 			Indicate that the tunnel is based on IPv6 protocol
+ * 			instead of IPv4.
+ * 		**BPF_F_ZERO_CSUM_TX**
+ * 			For IPv4 packets, add a flag to tunnel metadata
+ * 			indicating that checksum computation should be skipped
+ * 			and checksum set to zeroes.
+ * 		**BPF_F_DONT_FRAGMENT**
+ * 			Add a flag to tunnel metadata indicating that the
+ * 			packet should not be fragmented.
+ * 		**BPF_F_SEQ_NUMBER**
+ * 			Add a flag to tunnel metadata indicating that a
+ * 			sequence number should be added to tunnel header before
+ * 			sending the packet. This flag was added for GRE
+ * 			encapsulation, but might be used with other protocols
+ * 			as well in the future.
+ *
+ * 		Here is a typical usage on the transmit path:
+ *
+ * 		::
+ *
+ * 			struct bpf_tunnel_key key;
+ * 			     populate key ...
+ * 			bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * 			bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * 		See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * 		helper for additional information.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * 	Description
+ * 		Read the value of a perf event counter. This helper relies on a
+ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * 		the perf event counter is selected when *map* is updated with
+ * 		perf event file descriptors. The *map* is an array whose size
+ * 		is the number of available CPUs, and each cell contains a value
+ * 		relative to one CPU. The value to retrieve is indicated by
+ * 		*flags*, that contains the index of the CPU to look up, masked
+ * 		with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
+ * 		current CPU should be retrieved.
+ *
+ * 		Note that before Linux 4.13, only hardware perf event can be
+ * 		retrieved.
+ *
+ * 		Also, be aware that the newer helper
+ * 		**bpf_perf_event_read_value**\ () is recommended over
+ * 		**bpf_perf_event_read*\ () in general. The latter has some ABI
+ * 		quirks where error and counter value are used as a return code
+ * 		(which is wrong to do since ranges may overlap). This issue is
+ * 		fixed with bpf_perf_event_read_value(), which at the same time
+ * 		provides more features over the **bpf_perf_event_read**\ ()
+ * 		interface. Please refer to the description of
+ * 		**bpf_perf_event_read_value**\ () for details.
+ * 	Return
+ * 		The value of the perf event counter read from the map, or a
+ * 		negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*.
+ * 		This helper is somewhat similar to **bpf_clone_redirect**\
+ * 		(), except that the packet is not cloned, which provides
+ * 		increased performance.
+ *
+ * 		Except for XDP, both ingress and egress interfaces can be used
+ * 		for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * 		to make the distinction (ingress path is selected if the flag
+ * 		is present, egress path otherwise). Currently, XDP only
+ * 		supports redirection to the egress interface, and accepts no
+ * 		flag at all.
+ *
+ * 		The same effect can be attained with the more generic
+ * 		**bpf_redirect_map**\ (), which requires specific maps to be
+ * 		used but offers better performance.
+ * 	Return
+ * 		For XDP, the helper returns **XDP_REDIRECT** on success or
+ * 		**XDP_ABORTED** on error. For other program types, the values
+ * 		are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * 		error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * 	Description
+ * 		Retrieve the realm or the route, that is to say the
+ * 		**tclassid** field of the destination for the *skb*. The
+ * 		indentifier retrieved is a user-provided tag, similar to the
+ * 		one used with the net_cls cgroup (see description for
+ * 		**bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * 		held by a route (a destination entry), not by a task.
+ *
+ * 		Retrieving this identifier works with the clsact TC egress hook
+ * 		(see also **tc-bpf(8)**), or alternatively on conventional
+ * 		classful egress qdiscs, but not on TC ingress path. In case of
+ * 		clsact TC egress hook, this has the advantage that, internally,
+ * 		the destination entry has not been dropped yet in the transmit
+ * 		path. Therefore, the destination entry does not need to be
+ * 		artificially held via **netif_keep_dst**\ () for a classful
+ * 		qdisc until the *skb* is freed.
+ *
+ * 		This helper is available only if the kernel was compiled with
+ * 		**CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * 	Return
+ * 		The realm of the route for the packet associated to *skb*, or 0
+ * 		if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * 	Description
+ * 		Write raw *data* blob into a special BPF perf event held by
+ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * 		event must have the following attributes: **PERF_SAMPLE_RAW**
+ * 		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * 		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * 		The *flags* are used to indicate the index in *map* for which
+ * 		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * 		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * 		to indicate that the index of the current CPU core should be
+ * 		used.
+ *
+ * 		The value to write, of *size*, is passed through eBPF stack and
+ * 		pointed by *data*.
+ *
+ * 		The context of the program *ctx* needs also be passed to the
+ * 		helper.
+ *
+ * 		On user space, a program willing to read the values needs to
+ * 		call **perf_event_open**\ () on the perf event (either for
+ * 		one or for all CPUs) and to store the file descriptor into the
+ * 		*map*. This must be done before the eBPF program can send data
+ * 		into it. An example is available in file
+ * 		*samples/bpf/trace_output_user.c* in the Linux kernel source
+ * 		tree (the eBPF program counterpart is in
+ * 		*samples/bpf/trace_output_kern.c*).
+ *
+ * 		**bpf_perf_event_output**\ () achieves better performance
+ * 		than **bpf_trace_printk**\ () for sharing data with user
+ * 		space, and is much better suitable for streaming data from eBPF
+ * 		programs.
+ *
+ * 		Note that this helper is not restricted to tracing use cases
+ * 		and can be used with programs attached to TC or XDP as well,
+ * 		where it allows for passing data to user space listeners. Data
+ * 		can be:
+ *
+ * 		* Only custom structs,
+ * 		* Only the packet payload, or
+ * 		* A combination of both.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * 	Description
+ * 		This helper was provided as an easy way to load data from a
+ * 		packet. It can be used to load *len* bytes from *offset* from
+ * 		the packet associated to *skb*, into the buffer pointed by
+ * 		*to*.
+ *
+ * 		Since Linux 4.7, usage of this helper has mostly been replaced
+ * 		by "direct packet access", enabling packet data to be
+ * 		manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * 		pointing respectively to the first byte of packet data and to
+ * 		the byte after the last byte of packet data. However, it
+ * 		remains useful if one wishes to read large quantities of data
+ * 		at once from a packet into the eBPF stack.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * 	Description
+ * 		Walk a user or a kernel stack and return its id. To achieve
+ * 		this, the helper needs *ctx*, which is a pointer to the context
+ * 		on which the tracing program is executed, and a pointer to a
+ * 		*map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * 		The last argument, *flags*, holds the number of stack frames to
+ * 		skip (from 0 to 255), masked with
+ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * 		a combination of the following flags:
+ *
+ * 		**BPF_F_USER_STACK**
+ * 			Collect a user space stack instead of a kernel stack.
+ * 		**BPF_F_FAST_STACK_CMP**
+ * 			Compare stacks by hash only.
+ * 		**BPF_F_REUSE_STACKID**
+ * 			If two different stacks hash into the same *stackid*,
+ * 			discard the old one.
+ *
+ * 		The stack id retrieved is a 32 bit long integer handle which
+ * 		can be further combined with other data (including other stack
+ * 		ids) and used as a key into maps. This can be useful for
+ * 		generating a variety of graphs (such as flame graphs or off-cpu
+ * 		graphs).
+ *
+ * 		For walking a stack, this helper is an improvement over
+ * 		**bpf_probe_read**\ (), which can be used with unrolled loops
+ * 		but is not efficient and consumes a lot of eBPF instructions.
+ * 		Instead, **bpf_get_stackid**\ () can collect up to
+ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * 		this limit can be controlled with the **sysctl** program, and
+ * 		that it should be manually increased in order to profile long
+ * 		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * 		::
+ *
+ * 			# sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * 	Return
+ * 		The positive or null stack id on success, or a negative error
+ * 		in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * 	Description
+ * 		Compute a checksum difference, from the raw buffer pointed by
+ * 		*from*, of length *from_size* (that must be a multiple of 4),
+ * 		towards the raw buffer pointed by *to*, of size *to_size*
+ * 		(same remark). An optional *seed* can be added to the value
+ * 		(this can be cascaded, the seed may come from a previous call
+ * 		to the helper).
+ *
+ * 		This is flexible enough to be used in several ways:
+ *
+ * 		* With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * 		  checksum, it can be used when pushing new data.
+ * 		* With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * 		  checksum, it can be used when removing data from a packet.
+ * 		* With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * 		  can be used to compute a diff. Note that *from_size* and
+ * 		  *to_size* do not need to be equal.
+ *
+ * 		This helper can be used in combination with
+ * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * 		which one can feed in the difference computed with
+ * 		**bpf_csum_diff**\ ().
+ * 	Return
+ * 		The checksum result, or a negative error code in case of
+ * 		failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * 	Description
+ * 		Retrieve tunnel options metadata for the packet associated to
+ * 		*skb*, and store the raw tunnel option data to the buffer *opt*
+ * 		of *size*.
+ *
+ * 		This helper can be used with encapsulation devices that can
+ * 		operate in "collect metadata" mode (please refer to the related
+ * 		note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * 		more details). A particular example where this can be used is
+ * 		in combination with the Geneve encapsulation protocol, where it
+ * 		allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * 		and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * 		the eBPF program. This allows for full customization of these
+ * 		headers.
+ * 	Return
+ * 		The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * 	Description
+ * 		Set tunnel options metadata for the packet associated to *skb*
+ * 		to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * 		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * 		helper for additional information.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * 	Description
+ * 		Change the protocol of the *skb* to *proto*. Currently
+ * 		supported are transition from IPv4 to IPv6, and from IPv6 to
+ * 		IPv4. The helper takes care of the groundwork for the
+ * 		transition, including resizing the socket buffer. The eBPF
+ * 		program is expected to fill the new headers, if any, via
+ * 		**skb_store_bytes**\ () and to recompute the checksums with
+ * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * 		(). The main case for this helper is to perform NAT64
+ * 		operations out of an eBPF program.
+ *
+ * 		Internally, the GSO type is marked as dodgy so that headers are
+ * 		checked and segments are recalculated by the GSO/GRO engine.
+ * 		The size for GSO target is adapted as well.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * 	Description
+ * 		Change the packet type for the packet associated to *skb*. This
+ * 		comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * 		the eBPF program does not have a write access to *skb*\
+ * 		**->pkt_type** beside this helper. Using a helper here allows
+ * 		for graceful handling of errors.
+ *
+ * 		The major use case is to change incoming *skb*s to
+ * 		**PACKET_HOST** in a programmatic way instead of having to
+ * 		recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * 		example.
+ *
+ * 		Note that *type* only allows certain values. At this time, they
+ * 		are:
+ *
+ * 		**PACKET_HOST**
+ * 			Packet is for us.
+ * 		**PACKET_BROADCAST**
+ * 			Send packet to all.
+ * 		**PACKET_MULTICAST**
+ * 			Send packet to group.
+ * 		**PACKET_OTHERHOST**
+ * 			Send packet to someone else.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * 	Description
+ * 		Check whether *skb* is a descendant of the cgroup2 held by
+ * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * 	Return
+ * 		The return value depends on the result of the test, and can be:
+ *
+ * 		* 0, if the *skb* failed the cgroup2 descendant test.
+ * 		* 1, if the *skb* succeeded the cgroup2 descendant test.
+ * 		* A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * 	Description
+ * 		Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * 		not set, in particular if the hash was cleared due to mangling,
+ * 		recompute this hash. Later accesses to the hash can be done
+ * 		directly with *skb*\ **->hash**.
+ *
+ * 		Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * 		prototype with **bpf_skb_change_proto**\ (), or calling
+ * 		**bpf_skb_store_bytes**\ () with the
+ * 		**BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * 		the hash and to trigger a new computation for the next call to
+ * 		**bpf_get_hash_recalc**\ ().
+ * 	Return
+ * 		The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
- *     Returns current task_struct
- *     Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- *     safely attempt to write to a location
- *     @dst: destination address in userspace
- *     @src: source address on stack
- *     @len: number of bytes to copy
- *     Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- *     Check cgroup2 membership of current task
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 current failed the cgroup2 descendant test
- *       == 1 current succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- *     The helper will resize the skb to the given new size, to be used f.e.
- *     with control messages.
- *     @skb: pointer to skb
- *     @len: new skb length
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- *     The helper will pull in non-linear data in case the skb is non-linear
- *     and not all of len are part of the linear section. Only needed for
- *     read/write with direct packet access.
- *     @skb: pointer to skb
- *     @len: len to make read/writeable
- *     Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- *     @skb: pointer to skb
- *     @csum: csum to add
- *     Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- *     Invalidate current skb->hash.
- *     @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- *     Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- *     Grows headroom of skb and adjusts MAC header offset accordingly.
- *     Will extends/reallocae as required automatically.
- *     May change skb data pointer and will thus invalidate any check
- *     performed for direct packet access.
- *     @skb: pointer to skb
- *     @len: length of header to be pushed in front
- *     @flags: Flags (unused for now)
- *     Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- *     Adjust the xdp_md.data by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data
- *     Return: 0 on success or negative on error
+ * 	Return
+ * 		A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * 	Description
+ * 		Attempt in a safe way to write *len* bytes from the buffer
+ * 		*src* to *dst* in memory. It only works for threads that are in
+ * 		user context, and *dst* must be a valid user space address.
+ *
+ * 		This helper should not be used to implement any kind of
+ * 		security mechanism because of TOC-TOU attacks, but rather to
+ * 		debug, divert, and manipulate execution of semi-cooperative
+ * 		processes.
+ *
+ * 		Keep in mind that this feature is meant for experiments, and it
+ * 		has a risk of crashing the system and running programs.
+ * 		Therefore, when an eBPF program using this helper is attached,
+ * 		a warning including PID and process name is printed to kernel
+ * 		logs.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * 	Description
+ * 		Check whether the probe is being run is the context of a given
+ * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * 	Return
+ * 		The return value depends on the result of the test, and can be:
+ *
+ * 		* 0, if the *skb* task belongs to the cgroup2.
+ * 		* 1, if the *skb* task does not belong to the cgroup2.
+ * 		* A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * 	Description
+ * 		Resize (trim or grow) the packet associated to *skb* to the
+ * 		new *len*. The *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		The basic idea is that the helper performs the needed work to
+ * 		change the size of the packet, then the eBPF program rewrites
+ * 		the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * 		**bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * 		and others. This helper is a slow path utility intended for
+ * 		replies with control messages. And because it is targeted for
+ * 		slow path, the helper itself can afford to be slow: it
+ * 		implicitly linearizes, unclones and drops offloads from the
+ * 		*skb*.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * 	Description
+ * 		Pull in non-linear data in case the *skb* is non-linear and not
+ * 		all of *len* are part of the linear section. Make *len* bytes
+ * 		from *skb* readable and writable. If a zero value is passed for
+ * 		*len*, then the whole length of the *skb* is pulled.
+ *
+ * 		This helper is only needed for reading and writing with direct
+ * 		packet access.
+ *
+ * 		For direct packet access, testing that offsets to access
+ * 		are within packet boundaries (test on *skb*\ **->data_end**) is
+ * 		susceptible to fail if offsets are invalid, or if the requested
+ * 		data is in non-linear parts of the *skb*. On failure the
+ * 		program can just bail out, or in the case of a non-linear
+ * 		buffer, use a helper to make the data available. The
+ * 		**bpf_skb_load_bytes**\ () helper is a first solution to access
+ * 		the data. Another one consists in using **bpf_skb_pull_data**
+ * 		to pull in once the non-linear parts, then retesting and
+ * 		eventually access the data.
+ *
+ * 		At the same time, this also makes sure the *skb* is uncloned,
+ * 		which is a necessary condition for direct write. As this needs
+ * 		to be an invariant for the write part only, the verifier
+ * 		detects writes and adds a prologue that is calling
+ * 		**bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * 		the very beginning in case it is indeed cloned.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * 	Description
+ * 		Add the checksum *csum* into *skb*\ **->csum** in case the
+ * 		driver has supplied a checksum for the entire packet into that
+ * 		field. Return an error otherwise. This helper is intended to be
+ * 		used in combination with **bpf_csum_diff**\ (), in particular
+ * 		when the checksum needs to be updated after data has been
+ * 		written into the packet through direct packet access.
+ * 	Return
+ * 		The checksum on success, or a negative error code in case of
+ * 		failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * 	Description
+ * 		Invalidate the current *skb*\ **->hash**. It can be used after
+ * 		mangling on headers through direct packet access, in order to
+ * 		indicate that the hash is outdated and to trigger a
+ * 		recalculation the next time the kernel tries to access this
+ * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * 	Description
+ * 		Return the id of the current NUMA node. The primary use case
+ * 		for this helper is the selection of sockets for the local NUMA
+ * 		node, when the program is attached to sockets using the
+ * 		**SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * 		but the helper is also available to other eBPF program types,
+ * 		similarly to **bpf_get_smp_processor_id**\ ().
+ * 	Return
+ * 		The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * 	Description
+ * 		Grows headroom of packet associated to *skb* and adjusts the
+ * 		offset of the MAC header accordingly, adding *len* bytes of
+ * 		space. It automatically extends and reallocates memory as
+ * 		required.
+ *
+ * 		This helper can be used on a layer 3 *skb* to push a MAC header
+ * 		for redirection into a layer 2 device.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * 	Description
+ * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * 		it is possible to use a negative value for *delta*. This helper
+ * 		can be used to prepare the packet for pushing or popping
+ * 		headers.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- *     Copy a NUL terminated string from unsafe address. In case the string
- *     length is smaller than size, the target is not padded with further NUL
- *     bytes. In case the string length is larger than size, just count-1
- *     bytes are copied and the last byte is set to NUL.
- *     @dst: destination address
- *     @size: maximum number of bytes to copy, including the trailing NUL
- *     @unsafe_ptr: unsafe address
- *     Return:
- *       > 0 length of the string including the trailing NUL on success
- *       < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- *     Get the cookie for the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
- *     field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- *     Get the owner uid of the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- *     Set full skb->hash.
- *     @skb: pointer to skb
- *     @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls setsockopt. Not all opts are available, only those with
- *     integer optvals plus TCP_CONGESTION.
- *     Supported levels: SOL_SOCKET and IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: SOL_SOCKET or IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls getsockopt. Not all opts are available.
- *     Supported levels: IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- *     Set callback flags for sock_ops
- *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- *     @flags: flags value
- *     Return: 0 for no error
- *             -EINVAL if there is no full tcp socket
- *             bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- *     Grow or shrink room in sk_buff.
- *     @skb: pointer to skb
- *     @len_diff: (signed) amount of room to grow/shrink
- *     @mode: operation mode (enum bpf_adj_room_mode)
- *     @flags: reserved for future use
- *     Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- *     Redirect skb to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- *	@skops: pointer to bpf_sock_ops
- *	@map: pointer to sockmap to update
- *	@key: key to insert/update sock in map
- *	@flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- *     Adjust the xdp_md.data_meta by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data_meta
- *     Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- *     read perf event counter value and perf event enabled/running time
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- *     read perf prog attached perf event counter and enabled/running time
- *     @ctx: pointer to ctx
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- *	@pt_regs: pointer to struct pt_regs
- *	@rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- *     Redirect msg to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- *     Bind socket to address. Only binding to IP is supported, no port can be
- *     set in addr.
- *     @ctx: pointer to context of type bpf_sock_addr
- *     @addr: pointer to struct sockaddr to bind socket to
- *     @addr_len: length of sockaddr structure
- *     Return: 0 on success or negative error code
- *
- * int bpf_xdp_adjust_tail(xdp_md, delta)
- *     Adjust the xdp_md.data_end by delta. Only shrinking of packet's
- *     size is supported.
- *     @xdp_md: pointer to xdp_md
- *     @delta: A negative integer to be added to xdp_md.data_end
- *     Return: 0 on success or negative on error
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe address
+ * 		*unsafe_ptr* to *dst*. The *size* should include the
+ * 		terminating NUL byte. In case the string length is smaller than
+ * 		*size*, the target is not padded with further NUL bytes. If the
+ * 		string length is larger than *size*, just *size*-1 bytes are
+ * 		copied and the last byte is set to NUL.
+ *
+ * 		On success, the length of the copied string is returned. This
+ * 		makes this helper useful in tracing programs for reading
+ * 		strings, and more importantly to get its length at runtime. See
+ * 		the following snippet:
+ *
+ * 		::
+ *
+ * 			SEC("kprobe/sys_open")
+ * 			void bpf_sys_open(struct pt_regs *ctx)
+ * 			{
+ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+ * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
+ * 				                             ctx->di);
+ *
+ * 				// Consume buf, for example push it to
+ * 				// userspace via bpf_perf_event_output(); we
+ * 				// can use res (the string length) as event
+ * 				// size, after checking its boundaries.
+ * 			}
+ *
+ * 		In comparison, using **bpf_probe_read()** helper here instead
+ * 		to read the string would require to estimate the length at
+ * 		compile time, and would often result in copying more memory
+ * 		than necessary.
+ *
+ * 		Another useful use case is when parsing individual process
+ * 		arguments or individual environment variables navigating
+ * 		*current*\ **->mm->arg_start** and *current*\
+ * 		**->mm->env_start**: using this helper and the return value,
+ * 		one can quickly iterate at the right offset of the memory area.
+ * 	Return
+ * 		On success, the strictly positive length of the string,
+ * 		including the trailing NUL character. On error, a negative
+ * 		value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * 	Description
+ * 		If the **struct sk_buff** pointed by *skb* has a known socket,
+ * 		retrieve the cookie (generated by the kernel) of this socket.
+ * 		If no cookie has been set yet, generate a new cookie. Once
+ * 		generated, the socket cookie remains stable for the life of the
+ * 		socket. This helper can be useful for monitoring per socket
+ * 		networking traffic statistics as it provides a unique socket
+ * 		identifier per namespace.
+ * 	Return
+ * 		A 8-byte long non-decreasing number on success, or 0 if the
+ * 		socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * 	Return
+ * 		The owner UID of the socket associated to *skb*. If the socket
+ * 		is **NULL**, or if it is not a full socket (i.e. if it is a
+ * 		time-wait or a request socket instead), **overflowuid** value
+ * 		is returned (note that **overflowuid** might also be the actual
+ * 		UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * 	Description
+ * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * 		to value *hash*.
+ * 	Return
+ * 		0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * 	Description
+ * 		Emulate a call to **setsockopt()** on the socket associated to
+ * 		*bpf_socket*, which must be a full socket. The *level* at
+ * 		which the option resides and the name *optname* of the option
+ * 		must be specified, see **setsockopt(2)** for more information.
+ * 		The option value of length *optlen* is pointed by *optval*.
+ *
+ * 		This helper actually implements a subset of **setsockopt()**.
+ * 		It supports the following *level*\ s:
+ *
+ * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
+ * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * 		  **TCP_BPF_SNDCWND_CLAMP**.
+ * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * 	Description
+ * 		Grow or shrink the room for data in the packet associated to
+ * 		*skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * 		There is a single supported mode at this time:
+ *
+ * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * 		  (room space is added or removed below the layer 3 header).
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * 	Description
+ * 		Redirect the packet to the endpoint referenced by *map* at
+ * 		index *key*. Depending on its type, this *map* can contain
+ * 		references to net devices (for forwarding packets through other
+ * 		ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * 		but this is only implemented for native XDP (with driver
+ * 		support) as of this writing).
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		When used to redirect packets to net devices, this helper
+ * 		provides a high performance increase over **bpf_redirect**\ ().
+ * 		This is due to various implementation details of the underlying
+ * 		mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * 		() tries to send packet as a "bulk" to the device.
+ * 	Return
+ * 		**XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * 	Description
+ * 		Redirect the packet to the socket referenced by *map* (of type
+ * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * 		egress interfaces can be used for redirection. The
+ * 		**BPF_F_INGRESS** value in *flags* is used to make the
+ * 		distinction (ingress path is selected if the flag is present,
+ * 		egress path otherwise). This is the only flag supported for now.
+ * 	Return
+ * 		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * 	Description
+ * 		Add an entry to, or update a *map* referencing sockets. The
+ * 		*skops* is used as a new value for the entry associated to
+ * 		*key*. *flags* is one of:
+ *
+ * 		**BPF_NOEXIST**
+ * 			The entry for *key* must not exist in the map.
+ * 		**BPF_EXIST**
+ * 			The entry for *key* must already exist in the map.
+ * 		**BPF_ANY**
+ * 			No condition on the existence of the entry for *key*.
+ *
+ * 		If the *map* has eBPF programs (parser and verdict), those will
+ * 		be inherited by the socket being added. If the socket is
+ * 		already attached to eBPF programs, this results in an error.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * 	Description
+ * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * 		*delta* (which can be positive or negative). Note that this
+ * 		operation modifies the address stored in *xdp_md*\ **->data**,
+ * 		so the latter must be loaded only after the helper has been
+ * 		called.
+ *
+ * 		The use of *xdp_md*\ **->data_meta** is optional and programs
+ * 		are not required to use it. The rationale is that when the
+ * 		packet is processed with XDP (e.g. as DoS filter), it is
+ * 		possible to push further meta data along with it before passing
+ * 		to the stack, and to give the guarantee that an ingress eBPF
+ * 		program attached as a TC classifier on the same device can pick
+ * 		this up for further post-processing. Since TC works with socket
+ * 		buffers, it remains possible to set from XDP the **mark** or
+ * 		**priority** pointers, or other pointers for the socket buffer.
+ * 		Having this scratch space generic and programmable allows for
+ * 		more flexibility as the user is free to store whatever meta
+ * 		data they need.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * 	Description
+ * 		Read the value of a perf event counter, and store it into *buf*
+ * 		of size *buf_size*. This helper relies on a *map* of type
+ * 		**BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * 		counter is selected when *map* is updated with perf event file
+ * 		descriptors. The *map* is an array whose size is the number of
+ * 		available CPUs, and each cell contains a value relative to one
+ * 		CPU. The value to retrieve is indicated by *flags*, that
+ * 		contains the index of the CPU to look up, masked with
+ * 		**BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
+ * 		current CPU should be retrieved.
+ *
+ * 		This helper behaves in a way close to
+ * 		**bpf_perf_event_read**\ () helper, save that instead of
+ * 		just returning the value observed, it fills the *buf*
+ * 		structure. This allows for additional data to be retrieved: in
+ * 		particular, the enabled and running times (in *buf*\
+ * 		**->enabled** and *buf*\ **->running**, respectively) are
+ * 		copied. In general, **bpf_perf_event_read_value**\ () is
+ * 		recommended over **bpf_perf_event_read**\ (), which has some
+ * 		ABI issues and provides fewer functionalities.
+ *
+ * 		These values are interesting, because hardware PMU (Performance
+ * 		Monitoring Unit) counters are limited resources. When there are
+ * 		more PMU based perf events opened than available counters,
+ * 		kernel will multiplex these events so each event gets certain
+ * 		percentage (but not all) of the PMU time. In case that
+ * 		multiplexing happens, the number of samples or counter value
+ * 		will not reflect the case compared to when no multiplexing
+ * 		occurs. This makes comparison between different runs difficult.
+ * 		Typically, the counter value should be normalized before
+ * 		comparing to other experiments. The usual normalization is done
+ * 		as follows.
+ *
+ * 		::
+ *
+ * 			normalized_counter = counter * t_enabled / t_running
+ *
+ * 		Where t_enabled is the time enabled for event and t_running is
+ * 		the time running for event since last normalization. The
+ * 		enabled and running times are accumulated since the perf event
+ * 		open. To achieve scaling factor between two invocations of an
+ * 		eBPF program, users can can use CPU id as the key (which is
+ * 		typical for perf array usage model) to remember the previous
+ * 		value and do the calculation inside the eBPF program.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * 	Description
+ * 		For en eBPF program attached to a perf event, retrieve the
+ * 		value of the event counter associated to *ctx* and store it in
+ * 		the structure pointed by *buf* and of size *buf_size*. Enabled
+ * 		and running times are also stored in the structure (see
+ * 		description of helper **bpf_perf_event_read_value**\ () for
+ * 		more details).
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * 	Description
+ * 		Emulate a call to **getsockopt()** on the socket associated to
+ * 		*bpf_socket*, which must be a full socket. The *level* at
+ * 		which the option resides and the name *optname* of the option
+ * 		must be specified, see **getsockopt(2)** for more information.
+ * 		The retrieved value is stored in the structure pointed by
+ * 		*opval* and of length *optlen*.
+ *
+ * 		This helper actually implements a subset of **getsockopt()**.
+ * 		It supports the following *level*\ s:
+ *
+ * 		* **IPPROTO_TCP**, which supports *optname*
+ * 		  **TCP_CONGESTION**.
+ * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * 	Description
+ * 		Used for error injection, this helper uses kprobes to override
+ * 		the return value of the probed function, and to set it to *rc*.
+ * 		The first argument is the context *regs* on which the kprobe
+ * 		works.
+ *
+ * 		This helper works by setting setting the PC (program counter)
+ * 		to an override function which is run in place of the original
+ * 		probed function. This means the probed function is not run at
+ * 		all. The replacement function just returns with the required
+ * 		value.
+ *
+ * 		This helper has security implications, and thus is subject to
+ * 		restrictions. It is only available if the kernel was compiled
+ * 		with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * 		option, and in this case it only works on functions tagged with
+ * 		**ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * 		Also, the helper is only available for the architectures having
+ * 		the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * 		x86 architecture is the only one to support this feature.
+ * 	Return
+ * 		0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
+ * 	Description
+ * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * 		for the full TCP socket associated to *bpf_sock_ops* to
+ * 		*argval*.
+ *
+ * 		The primary use of this field is to determine if there should
+ * 		be calls to eBPF programs of type
+ * 		**BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * 		code. A program of the same type can change its value, per
+ * 		connection and as necessary, when the connection is
+ * 		established. This field is directly accessible for reading, but
+ * 		this helper must be used for updates in order to return an
+ * 		error if an eBPF program tries to set a callback that is not
+ * 		supported in the current kernel.
+ *
+ * 		The supported callback values that *argval* can combine are:
+ *
+ * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * 		Here are some examples of where one could call such eBPF
+ * 		program:
+ *
+ * 		* When RTO fires.
+ * 		* When a packet is retransmitted.
+ * 		* When the connection terminates.
+ * 		* When a packet is sent.
+ * 		* When a packet is received.
+ * 	Return
+ * 		Code **-EINVAL** if the socket is not a full TCP socket;
+ * 		otherwise, a positive number containing the bits that could not
+ * 		be set is returned (which comes down to 0 if all bits were set
+ * 		as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * 	Description
+ * 		This helper is used in programs implementing policies at the
+ * 		socket level. If the message *msg* is allowed to pass (i.e. if
+ * 		the verdict eBPF program returns **SK_PASS**), redirect it to
+ * 		the socket referenced by *map* (of type
+ * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * 		egress interfaces can be used for redirection. The
+ * 		**BPF_F_INGRESS** value in *flags* is used to make the
+ * 		distinction (ingress path is selected if the flag is present,
+ * 		egress path otherwise). This is the only flag supported for now.
+ * 	Return
+ * 		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * 	Description
+ * 		For socket policies, apply the verdict of the eBPF program to
+ * 		the next *bytes* (number of bytes) of message *msg*.
+ *
+ * 		For example, this helper can be used in the following cases:
+ *
+ * 		* A single **sendmsg**\ () or **sendfile**\ () system call
+ * 		  contains multiple logical messages that the eBPF program is
+ * 		  supposed to read and for which it should apply a verdict.
+ * 		* An eBPF program only cares to read the first *bytes* of a
+ * 		  *msg*. If the message has a large payload, then setting up
+ * 		  and calling the eBPF program repeatedly for all bytes, even
+ * 		  though the verdict is already known, would create unnecessary
+ * 		  overhead.
+ *
+ * 		When called from within an eBPF program, the helper sets a
+ * 		counter internal to the BPF infrastructure, that is used to
+ * 		apply the last verdict to the next *bytes*. If *bytes* is
+ * 		smaller than the current data being processed from a
+ * 		**sendmsg**\ () or **sendfile**\ () system call, the first
+ * 		*bytes* will be sent and the eBPF program will be re-run with
+ * 		the pointer for start of data pointing to byte number *bytes*
+ * 		**+ 1**. If *bytes* is larger than the current data being
+ * 		processed, then the eBPF verdict will be applied to multiple
+ * 		**sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * 		consumed.
+ *
+ * 		Note that if a socket closes with the internal counter holding
+ * 		a non-zero value, this is not a problem because data is not
+ * 		being buffered for *bytes* and is sent as it is received.
+ * 	Return
+ * 		0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * 	Description
+ * 		For socket policies, prevent the execution of the verdict eBPF
+ * 		program for message *msg* until *bytes* (byte number) have been
+ * 		accumulated.
+ *
+ * 		This can be used when one needs a specific number of bytes
+ * 		before a verdict can be assigned, even if the data spans
+ * 		multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * 		case would be a user calling **sendmsg**\ () repeatedly with
+ * 		1-byte long message segments. Obviously, this is bad for
+ * 		performance, but it is still valid. If the eBPF program needs
+ * 		*bytes* bytes to validate a header, this helper can be used to
+ * 		prevent the eBPF program to be called again until *bytes* have
+ * 		been accumulated.
+ * 	Return
+ * 		0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * 	Description
+ * 		For socket policies, pull in non-linear data from user space
+ * 		for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * 		**->data_end** to *start* and *end* bytes offsets into *msg*,
+ * 		respectively.
+ *
+ * 		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * 		*msg* it can only parse data that the (**data**, **data_end**)
+ * 		pointers have already consumed. For **sendmsg**\ () hooks this
+ * 		is likely the first scatterlist element. But for calls relying
+ * 		on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * 		be the range (**0**, **0**) because the data is shared with
+ * 		user space and by default the objective is to avoid allowing
+ * 		user space to modify data while (or after) eBPF verdict is
+ * 		being decided. This helper can be used to pull in data and to
+ * 		set the start and end pointer to given values. Data will be
+ * 		copied if necessary (i.e. if data was not linear and if start
+ * 		and end pointers do not point to the same chunk).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
+ * 	Description
+ * 		Bind the socket associated to *ctx* to the address pointed by
+ * 		*addr*, of length *addr_len*. This allows for making outgoing
+ * 		connection from the desired IP address, which can be useful for
+ * 		example when all processes inside a cgroup should use one
+ * 		single IP address on a host that has multiple IP configured.
+ *
+ * 		This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * 		domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * 		**AF_INET6**). Looking for a free port to bind to can be
+ * 		expensive, therefore binding to port is not permitted by the
+ * 		helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * 		must be set to zero.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * 	Description
+ * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * 		only possible to shrink the packet as of this writing,
+ * 		therefore *delta* must be a negative integer.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * 	Description
+ * 		Retrieve the XFRM state (IP transform framework, see also
+ * 		**ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * 		The retrieved value is stored in the **struct bpf_xfrm_state**
+ * 		pointed by *xfrm_state* and of length *size*.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		This helper is available only if the kernel was compiled with
+ * 		**CONFIG_XFRM** configuration option.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -841,7 +1834,8 @@ union bpf_attr {
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
 	FN(bind),			\
-	FN(xdp_adjust_tail),
+	FN(xdp_adjust_tail),		\
+	FN(skb_get_xfrm_state),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -947,6 +1941,19 @@ struct bpf_tunnel_key {
 	__u32 tunnel_label;
 };
 
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+	__u32 reqid;
+	__u32 spi;	/* Stored in network byte order */
+	__u16 family;
+	union {
+		__u32 remote_ipv4;	/* Stored in network byte order */
+		__u32 remote_ipv6[4];	/* Stored in network byte order */
+	};
+};
+
 /* Generic BPF return codes which all BPF program types may support.
  * The values are binary compatible with their TC_ACT_* counter-part to
  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -1037,6 +2044,7 @@ struct bpf_prog_info {
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
 	__u32 ifindex;
+	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
 } __attribute__((aligned(8)));
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 74a30b1090df..bcb56ee47014 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -6,9 +6,7 @@
 #include <linux/types.h>
 
 #define BTF_MAGIC	0xeB9F
-#define BTF_MAGIC_SWAP	0x9FeB
 #define BTF_VERSION	1
-#define BTF_FLAGS_COMPR	0x01
 
 struct btf_header {
 	__u16	magic;
@@ -43,7 +41,7 @@ struct btf_header {
 #define BTF_STR_OFFSET(ref)	((ref) & BTF_MAX_NAME_OFFSET)
 
 struct btf_type {
-	__u32 name;
+	__u32 name_off;
 	/* "info" bits arrangement
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
@@ -105,7 +103,7 @@ struct btf_type {
  * info in "struct btf_type").
  */
 struct btf_enum {
-	__u32	name;
+	__u32	name_off;
 	__s32	val;
 };
 
@@ -122,7 +120,7 @@ struct btf_array {
  * "struct btf_type").
  */
 struct btf_member {
-	__u32	name;
+	__u32	name_off;
 	__u32	type;
 	__u32	offset;	/* offset in bits */
 };
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 58b6255abc7a..2bac710e3194 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -281,7 +281,7 @@ int32_t btf__find_by_name(const struct btf *btf, const char *type_name)
 
 	for (i = 1; i <= btf->nr_types; i++) {
 		const struct btf_type *t = btf->types[i];
-		const char *name = btf_name_by_offset(btf, t->name);
+		const char *name = btf_name_by_offset(btf, t->name_off);
 
 		if (name && !strcmp(type_name, name))
 			return i;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6513e0b08795..7bcdca13083a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1961,8 +1961,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
-						 enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+					   enum bpf_attach_type type)
 {
 	prog->expected_attach_type = type;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index d6ac4fa6f472..197f9ce2248c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -193,6 +193,8 @@ int bpf_program__set_sched_act(struct bpf_program *prog);
 int bpf_program__set_xdp(struct bpf_program *prog);
 int bpf_program__set_perf_event(struct bpf_program *prog);
 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+					   enum bpf_attach_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 5e1ab2f0eb79..3e3b3ced3f7c 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -15,3 +15,4 @@ test_libbpf_open
 test_sock
 test_sock_addr
 urandom_read
+test_btf
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0b72cc7596f1..b64a7a39cbc8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -24,7 +24,7 @@ urandom_read: urandom_read.c
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
-	test_sock test_sock_addr test_btf
+	test_sock test_btf test_sockmap
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
@@ -32,7 +32,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
 	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
-	test_btf_haskv.o test_btf_nokv.o
+	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -40,10 +40,11 @@ TEST_PROGS := test_kmod.sh \
 	test_xdp_redirect.sh \
 	test_xdp_meta.sh \
 	test_offload.py \
-	test_sock_addr.sh
+	test_sock_addr.sh \
+	test_tunnel.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
 
 include ../lib.mk
 
@@ -56,6 +57,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 9271576bdc8f..69d7b918e66a 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -98,7 +98,9 @@ static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
 	(void *) BPF_FUNC_bind;
 static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
 	(void *) BPF_FUNC_xdp_adjust_tail;
-
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+				     int size, int flags) =
+	(void *) BPF_FUNC_skb_get_xfrm_state;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644
index 000000000000..29c022d23f4e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -0,0 +1,1464 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_FILENAME "test_sockmap_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"cgroup",	required_argument,	NULL, 'c' },
+	{"rate",	required_argument,	NULL, 'r' },
+	{"verbose",	no_argument,		NULL, 'v' },
+	{"iov_count",	required_argument,	NULL, 'i' },
+	{"length",	required_argument,	NULL, 'l' },
+	{"test",	required_argument,	NULL, 't' },
+	{"data_test",   no_argument,		NULL, 'd' },
+	{"txmsg",		no_argument,	&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
+	{"txmsg_apply",	required_argument,	NULL, 'a'},
+	{"txmsg_cork",	required_argument,	NULL, 'k'},
+	{"txmsg_start", required_argument,	NULL, 's'},
+	{"txmsg_end",	required_argument,	NULL, 'e'},
+	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
+	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+	int i;
+
+	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+	printf(" options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-12s", long_options[i].name);
+		if (long_options[i].flag != NULL)
+			printf(" flag (internal value:%d)\n",
+				*long_options[i].flag);
+		else
+			printf(" -%c\n", long_options[i].val);
+	}
+	printf("\n");
+}
+
+static int sockmap_init_sockets(int verbose)
+{
+	int i, err, one = 1;
+	struct sockaddr_in addr;
+	int *fds[4] = {&s1, &s2, &c1, &c2};
+
+	s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+	/* Init sockets */
+	for (i = 0; i < 4; i++) {
+		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (*fds[i] < 0) {
+			perror("socket s1 failed()");
+			return errno;
+		}
+	}
+
+	/* Allow reuse */
+	for (i = 0; i < 2; i++) {
+		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			perror("setsockopt failed()");
+			return errno;
+		}
+	}
+
+	/* Non-blocking sockets */
+	for (i = 0; i < 2; i++) {
+		err = ioctl(*fds[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			perror("ioctl s1 failed()");
+			return errno;
+		}
+	}
+
+	/* Bind server sockets */
+	memset(&addr, 0, sizeof(struct sockaddr_in));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	addr.sin_port = htons(S1_PORT);
+	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s2 failed()\n");
+		return errno;
+	}
+
+	/* Listen server sockets */
+	addr.sin_port = htons(S1_PORT);
+	err = listen(s1, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = listen(s2, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		return errno;
+	}
+
+	/* Initiate Connect */
+	addr.sin_port = htons(S1_PORT);
+	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c2 failed()\n");
+		return errno;
+	} else if (err < 0) {
+		err = 0;
+	}
+
+	/* Accept Connecrtions */
+	p1 = accept(s1, NULL, NULL);
+	if (p1 < 0) {
+		perror("accept s1 failed()\n");
+		return errno;
+	}
+
+	p2 = accept(s2, NULL, NULL);
+	if (p2 < 0) {
+		perror("accept s1 failed()\n");
+		return errno;
+	}
+
+	if (verbose) {
+		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+			c1, s1, c2, s2);
+	}
+	return 0;
+}
+
+struct msg_stats {
+	size_t bytes_sent;
+	size_t bytes_recvd;
+	struct timespec start;
+	struct timespec end;
+};
+
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+	int iov_count;
+	int iov_length;
+	int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+			     struct msg_stats *s,
+			     struct sockmap_options *opt)
+{
+	bool drop = opt->drop_expected;
+	unsigned char k = 0;
+	FILE *file;
+	int i, fp;
+
+	file = fopen(".sendpage_tst.tmp", "w+");
+	for (i = 0; i < iov_length * cnt; i++, k++)
+		fwrite(&k, sizeof(char), 1, file);
+	fflush(file);
+	fseek(file, 0, SEEK_SET);
+	fclose(file);
+
+	fp = open(".sendpage_tst.tmp", O_RDONLY);
+	clock_gettime(CLOCK_MONOTONIC, &s->start);
+	for (i = 0; i < cnt; i++) {
+		int sent = sendfile(fd, fp, NULL, iov_length);
+
+		if (!drop && sent < 0) {
+			perror("send loop error:");
+			close(fp);
+			return sent;
+		} else if (drop && sent >= 0) {
+			printf("sendpage loop error expected: %i\n", sent);
+			close(fp);
+			return -EIO;
+		}
+
+		if (sent > 0)
+			s->bytes_sent += sent;
+	}
+	clock_gettime(CLOCK_MONOTONIC, &s->end);
+	close(fp);
+	return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
+{
+	struct msghdr msg = {0};
+	int err, i, flags = MSG_NOSIGNAL;
+	struct iovec *iov;
+	unsigned char k;
+	bool data_test = opt->data_test;
+	bool drop = opt->drop_expected;
+
+	iov = calloc(iov_count, sizeof(struct iovec));
+	if (!iov)
+		return errno;
+
+	k = 0;
+	for (i = 0; i < iov_count; i++) {
+		unsigned char *d = calloc(iov_length, sizeof(char));
+
+		if (!d) {
+			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+			goto out_errno;
+		}
+		iov[i].iov_base = d;
+		iov[i].iov_len = iov_length;
+
+		if (data_test && tx) {
+			int j;
+
+			for (j = 0; j < iov_length; j++)
+				d[j] = k++;
+		}
+	}
+
+	msg.msg_iov = iov;
+	msg.msg_iovlen = iov_count;
+	k = 0;
+
+	if (tx) {
+		clock_gettime(CLOCK_MONOTONIC, &s->start);
+		for (i = 0; i < cnt; i++) {
+			int sent = sendmsg(fd, &msg, flags);
+
+			if (!drop && sent < 0) {
+				perror("send loop error:");
+				goto out_errno;
+			} else if (drop && sent >= 0) {
+				printf("send loop error expected: %i\n", sent);
+				errno = -EIO;
+				goto out_errno;
+			}
+			if (sent > 0)
+				s->bytes_sent += sent;
+		}
+		clock_gettime(CLOCK_MONOTONIC, &s->end);
+	} else {
+		int slct, recv, max_fd = fd;
+		int fd_flags = O_NONBLOCK;
+		struct timeval timeout;
+		float total_bytes;
+		fd_set w;
+
+		fcntl(fd, fd_flags);
+		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+		if (err < 0)
+			perror("recv start time: ");
+		while (s->bytes_recvd < total_bytes) {
+			timeout.tv_sec = 0;
+			timeout.tv_usec = 10;
+
+			/* FD sets */
+			FD_ZERO(&w);
+			FD_SET(fd, &w);
+
+			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+			if (slct == -1) {
+				perror("select()");
+				clock_gettime(CLOCK_MONOTONIC, &s->end);
+				goto out_errno;
+			} else if (!slct) {
+				if (opt->verbose)
+					fprintf(stderr, "unexpected timeout\n");
+				errno = -EIO;
+				clock_gettime(CLOCK_MONOTONIC, &s->end);
+				goto out_errno;
+			}
+
+			recv = recvmsg(fd, &msg, flags);
+			if (recv < 0) {
+				if (errno != EWOULDBLOCK) {
+					clock_gettime(CLOCK_MONOTONIC, &s->end);
+					perror("recv failed()\n");
+					goto out_errno;
+				}
+			}
+
+			s->bytes_recvd += recv;
+
+			if (data_test) {
+				int j;
+
+				for (i = 0; i < msg.msg_iovlen; i++) {
+					unsigned char *d = iov[i].iov_base;
+
+					for (j = 0;
+					     j < iov[i].iov_len && recv; j++) {
+						if (d[j] != k++) {
+							errno = -EIO;
+							fprintf(stderr,
+								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+								i, j, d[j], k - 1, d[j+1], k + 1);
+							goto out_errno;
+						}
+						recv--;
+					}
+				}
+			}
+		}
+		clock_gettime(CLOCK_MONOTONIC, &s->end);
+	}
+
+	for (i = 0; i < iov_count; i++)
+		free(iov[i].iov_base);
+	free(iov);
+	return 0;
+out_errno:
+	for (i = 0; i < iov_count; i++)
+		free(iov[i].iov_base);
+	free(iov);
+	return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+	float sent_Bps = 0, recvd_Bps = 0;
+	int rx_fd, txpid, rxpid, err = 0;
+	struct msg_stats s = {0};
+	int iov_count = opt->iov_count;
+	int iov_buf = opt->iov_length;
+	int cnt = opt->rate;
+	int status;
+
+	errno = 0;
+
+	if (opt->base)
+		rx_fd = p1;
+	else
+		rx_fd = p2;
+
+	rxpid = fork();
+	if (rxpid == 0) {
+		if (opt->drop_expected)
+			exit(1);
+
+		if (opt->sendpage)
+			iov_count = 1;
+		err = msg_loop(rx_fd, iov_count, iov_buf,
+			       cnt, &s, false, opt);
+		if (err && opt->verbose)
+			fprintf(stderr,
+				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+				iov_count, iov_buf, cnt, err);
+		shutdown(p2, SHUT_RDWR);
+		shutdown(p1, SHUT_RDWR);
+		if (s.end.tv_sec - s.start.tv_sec) {
+			sent_Bps = sentBps(s);
+			recvd_Bps = recvdBps(s);
+		}
+		if (opt->verbose)
+			fprintf(stdout,
+				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+				s.bytes_sent, sent_Bps, sent_Bps/giga,
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+		exit(1);
+	} else if (rxpid == -1) {
+		perror("msg_loop_rx: ");
+		return errno;
+	}
+
+	txpid = fork();
+	if (txpid == 0) {
+		if (opt->sendpage)
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+		else
+			err = msg_loop(c1, iov_count, iov_buf,
+				       cnt, &s, true, opt);
+
+		if (err)
+			fprintf(stderr,
+				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+				iov_count, iov_buf, cnt, err);
+		shutdown(c1, SHUT_RDWR);
+		if (s.end.tv_sec - s.start.tv_sec) {
+			sent_Bps = sentBps(s);
+			recvd_Bps = recvdBps(s);
+		}
+		if (opt->verbose)
+			fprintf(stdout,
+				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+				s.bytes_sent, sent_Bps, sent_Bps/giga,
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+		exit(1);
+	} else if (txpid == -1) {
+		perror("msg_loop_tx: ");
+		return errno;
+	}
+
+	assert(waitpid(rxpid, &status, 0) == rxpid);
+	assert(waitpid(txpid, &status, 0) == txpid);
+	return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+	struct timeval timeout;
+	char buf[1024] = {0};
+	int sc;
+
+	timeout.tv_sec = 10;
+	timeout.tv_usec = 0;
+
+	/* Ping/Pong data from client to server */
+	sc = send(c1, buf, sizeof(buf), 0);
+	if (sc < 0) {
+		perror("send failed()\n");
+		return sc;
+	}
+
+	do {
+		int s, rc, i, max_fd = p2;
+		fd_set w;
+
+		/* FD sets */
+		FD_ZERO(&w);
+		FD_SET(c1, &w);
+		FD_SET(c2, &w);
+		FD_SET(p1, &w);
+		FD_SET(p2, &w);
+
+		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+		if (s == -1) {
+			perror("select()");
+			break;
+		} else if (!s) {
+			fprintf(stderr, "unexpected timeout\n");
+			break;
+		}
+
+		for (i = 0; i <= max_fd && s > 0; ++i) {
+			if (!FD_ISSET(i, &w))
+				continue;
+
+			s--;
+
+			rc = recv(i, buf, sizeof(buf), 0);
+			if (rc < 0) {
+				if (errno != EWOULDBLOCK) {
+					perror("recv failed()\n");
+					return rc;
+				}
+			}
+
+			if (rc == 0) {
+				close(i);
+				break;
+			}
+
+			sc = send(i, buf, rc, 0);
+			if (sc < 0) {
+				perror("send failed()\n");
+				return sc;
+			}
+		}
+
+		if (rate)
+			sleep(rate);
+
+		if (opt->verbose) {
+			printf(".");
+			fflush(stdout);
+
+		}
+	} while (running);
+
+	return 0;
+}
+
+enum {
+	PING_PONG,
+	SENDMSG,
+	BASE,
+	BASE_SENDPAGE,
+	SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd,  int test)
+{
+	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+	/* If base test skip BPF setup */
+	if (test == BASE || test == BASE_SENDPAGE)
+		goto run;
+
+	/* Attach programs to sockmap */
+	err = bpf_prog_attach(prog_fd[0], map_fd[0],
+				BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		fprintf(stderr,
+			"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+			prog_fd[0], map_fd[0], err, strerror(errno));
+		return err;
+	}
+
+	err = bpf_prog_attach(prog_fd[1], map_fd[0],
+				BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+	/* Attach to cgroups */
+	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+run:
+	err = sockmap_init_sockets(options->verbose);
+	if (err) {
+		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+		goto out;
+	}
+
+	/* Attach txmsg program to sockmap */
+	if (txmsg_pass)
+		tx_prog_fd = prog_fd[3];
+	else if (txmsg_noisy)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[5];
+	else if (txmsg_redir_noisy)
+		tx_prog_fd = prog_fd[6];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[9];
+	/* apply and cork must be last */
+	else if (txmsg_apply)
+		tx_prog_fd = prog_fd[7];
+	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[8];
+	else
+		tx_prog_fd = 0;
+
+	if (tx_prog_fd) {
+		int redir_fd, i = 0;
+
+		err = bpf_prog_attach(tx_prog_fd,
+				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		if (txmsg_redir || txmsg_redir_noisy)
+			redir_fd = c2;
+		else
+			redir_fd = c1;
+
+		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		if (txmsg_apply) {
+			err = bpf_map_update_elem(map_fd[3],
+						  &i, &txmsg_apply, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_cork) {
+			err = bpf_map_update_elem(map_fd[4],
+						  &i, &txmsg_cork, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_start) {
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_end) {
+			i = 1;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_ingress) {
+			int in = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+			i = 1;
+			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 2;
+			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
+
+		if (txmsg_skb) {
+			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+					p2 : p1;
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[7],
+						  &i, &ingress, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 3;
+			err = bpf_map_update_elem(map_fd[0],
+						  &i, &skb_fd, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
+	}
+
+	if (txmsg_drop)
+		options->drop_expected = true;
+
+	if (test == PING_PONG)
+		err = forever_ping_pong(options->rate, options);
+	else if (test == SENDMSG) {
+		options->base = false;
+		options->sendpage = false;
+		err = sendmsg_test(options);
+	} else if (test == SENDPAGE) {
+		options->base = false;
+		options->sendpage = true;
+		err = sendmsg_test(options);
+	} else if (test == BASE) {
+		options->base = true;
+		options->sendpage = false;
+		err = sendmsg_test(options);
+	} else if (test == BASE_SENDPAGE) {
+		options->base = true;
+		options->sendpage = true;
+		err = sendmsg_test(options);
+	} else
+		fprintf(stderr, "unknown test\n");
+out:
+	/* Detatch and zero all the maps */
+	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+	if (tx_prog_fd >= 0)
+		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+	for (i = 0; i < 8; i++) {
+		key = next_key = 0;
+		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+			key = next_key;
+		}
+	}
+
+	close(s1);
+	close(s2);
+	close(p1);
+	close(p2);
+	close(c1);
+	close(c2);
+	return err;
+}
+
+static char *test_to_str(int test)
+{
+	switch (test) {
+	case SENDMSG:
+		return "sendmsg";
+	case SENDPAGE:
+		return "sendpage";
+	}
+	return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+	memset(options, 0, OPTSTRING);
+
+	if (txmsg_pass)
+		strncat(options, "pass,", OPTSTRING);
+	if (txmsg_noisy)
+		strncat(options, "pass_noisy,", OPTSTRING);
+	if (txmsg_redir)
+		strncat(options, "redir,", OPTSTRING);
+	if (txmsg_redir_noisy)
+		strncat(options, "redir_noisy,", OPTSTRING);
+	if (txmsg_drop)
+		strncat(options, "drop,", OPTSTRING);
+	if (txmsg_apply)
+		strncat(options, "apply,", OPTSTRING);
+	if (txmsg_cork)
+		strncat(options, "cork,", OPTSTRING);
+	if (txmsg_start)
+		strncat(options, "start,", OPTSTRING);
+	if (txmsg_end)
+		strncat(options, "end,", OPTSTRING);
+	if (txmsg_ingress)
+		strncat(options, "ingress,", OPTSTRING);
+	if (txmsg_skb)
+		strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+	char *options = calloc(60, sizeof(char));
+	int err;
+
+	if (test == SENDPAGE)
+		opt->sendpage = true;
+	else
+		opt->sendpage = false;
+
+	if (txmsg_drop)
+		opt->drop_expected = true;
+	else
+		opt->drop_expected = false;
+
+	test_options(options);
+
+	fprintf(stdout,
+		"[TEST %i]: (%i, %i, %i, %s, %s): ",
+		test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+		test_to_str(test), options);
+	fflush(stdout);
+	err = run_options(opt, cgrp, test);
+	fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+	test_cnt++;
+	!err ? passed++ : failed++;
+	free(options);
+	return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+	int err = __test_exec(cgrp, SENDMSG, opt);
+
+	if (err)
+		goto out;
+
+	err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+	return err;
+}
+
+static int test_loop(int cgrp)
+{
+	struct sockmap_options opt;
+
+	int err, i, l, r;
+
+	opt.verbose = 0;
+	opt.base = false;
+	opt.sendpage = false;
+	opt.data_test = false;
+	opt.drop_expected = false;
+	opt.iov_count = 0;
+	opt.iov_length = 0;
+	opt.rate = 0;
+
+	r = 1;
+	for (i = 1; i < 100; i += 33) {
+		for (l = 1; l < 100; l += 33) {
+			opt.rate = r;
+			opt.iov_count = i;
+			opt.iov_length = l;
+			err = test_exec(cgrp, &opt);
+			if (err)
+				goto out;
+		}
+	}
+	sched_yield();
+out:
+	return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+	int err;
+
+	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_ingress = txmsg_skb = 0;
+
+	txmsg_pass = 1;
+	err = test_loop(cgrp);
+	txmsg_pass = 0;
+	if (err)
+		goto out;
+
+	txmsg_redir = 1;
+	err = test_loop(cgrp);
+	txmsg_redir = 0;
+	if (err)
+		goto out;
+
+	txmsg_drop = 1;
+	err = test_loop(cgrp);
+	txmsg_drop = 0;
+	if (err)
+		goto out;
+
+	txmsg_redir = 1;
+	txmsg_ingress = 1;
+	err = test_loop(cgrp);
+	txmsg_redir = 0;
+	txmsg_ingress = 0;
+	if (err)
+		goto out;
+out:
+	txmsg_pass = 0;
+	txmsg_redir = 0;
+	txmsg_drop = 0;
+	return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+	int err;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1024;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1024;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1;
+	opt->rate = 1024;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 256;
+	opt->iov_count = 1024;
+	opt->rate = 10;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->rate = 100;
+	opt->iov_count = 1;
+	opt->iov_length = 5;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+out:
+	sched_yield();
+	return err;
+}
+
+static int test_mixed(int cgrp)
+{
+	struct sockmap_options opt = {0};
+	int err;
+
+	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_start = txmsg_end = 0;
+	/* Test small and large iov_count values with pass/redir/apply/cork */
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_cork = 4096;
+	txmsg_apply = 4096;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 0;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_cork = 4096;
+	txmsg_apply = 4096;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+out:
+	return err;
+}
+
+static int test_start_end(int cgrp)
+{
+	struct sockmap_options opt = {0};
+	int err, i;
+
+	/* Test basic start/end with lots of iov_count and iov_lengths */
+	txmsg_start = 1;
+	txmsg_end = 2;
+	err = test_txmsg(cgrp);
+	if (err)
+		goto out;
+
+	/* Test start/end with cork */
+	opt.rate = 16;
+	opt.iov_count = 1;
+	opt.iov_length = 100;
+	txmsg_cork = 1600;
+
+	for (i = 99; i <= 1600; i += 500) {
+		txmsg_start = 0;
+		txmsg_end = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+
+	/* Test start/end with cork but pull data in middle */
+	for (i = 199; i <= 1600; i += 500) {
+		txmsg_start = 100;
+		txmsg_end = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+
+	/* Test start/end with cork pulling last sg entry */
+	txmsg_start = 1500;
+	txmsg_end = 1600;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end pull of single byte in last page */
+	txmsg_start = 1111;
+	txmsg_end = 1112;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with end < start */
+	txmsg_start = 1111;
+	txmsg_end = 0;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with end > data */
+	txmsg_start = 0;
+	txmsg_end = 1601;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with start > data */
+	txmsg_start = 1601;
+	txmsg_end = 1600;
+	err = test_exec(cgrp, &opt);
+
+out:
+	txmsg_start = 0;
+	txmsg_end = 0;
+	sched_yield();
+	return err;
+}
+
+char *map_names[] = {
+	"sock_map",
+	"sock_map_txmsg",
+	"sock_map_redir",
+	"sock_apply_bytes",
+	"sock_cork_bytes",
+	"sock_pull_bytes",
+	"sock_redir_flags",
+	"sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
+	BPF_CGROUP_SOCK_OPS,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(void)
+{
+	char *bpf_file = BPF_FILENAME;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int i = 0;
+	long err;
+
+	obj = bpf_object__open(bpf_file);
+	err = libbpf_get_error(obj);
+	if (err) {
+		char err_buf[256];
+
+		libbpf_strerror(err, err_buf, sizeof(err_buf));
+		printf("Unable to load eBPF objects in file '%s' : %s\n",
+		       bpf_file, err_buf);
+		return -1;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		bpf_program__set_type(prog, prog_type[i]);
+		bpf_program__set_expected_attach_type(prog,
+						      prog_attach_type[i]);
+		i++;
+	}
+
+	i = bpf_object__load(obj);
+	i = 0;
+	bpf_object__for_each_program(prog, obj) {
+		prog_fd[i] = bpf_program__fd(prog);
+		i++;
+	}
+
+	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+		map_fd[i] = bpf_map__fd(maps[i]);
+		if (map_fd[i] < 0) {
+			fprintf(stderr, "load_bpf_file: (%i) %s\n",
+				map_fd[i], strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int test_suite(void)
+{
+	int cg_fd, err;
+
+	err = populate_progs();
+	if (err < 0) {
+		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+		return err;
+	}
+
+	if (setup_cgroup_environment()) {
+		fprintf(stderr, "ERROR: cgroup env failed\n");
+		return -EINVAL;
+	}
+
+	cg_fd = create_and_get_cgroup(CG_PATH);
+	if (cg_fd < 0) {
+		fprintf(stderr,
+			"ERROR: (%i) open cg path failed: %s\n",
+			cg_fd, optarg);
+		return cg_fd;
+	}
+
+	/* Tests basic commands and APIs with range of iov values */
+	txmsg_start = txmsg_end = 0;
+	err = test_txmsg(cg_fd);
+	if (err)
+		goto out;
+
+	/* Tests interesting combinations of APIs used together */
+	err = test_mixed(cg_fd);
+	if (err)
+		goto out;
+
+	/* Tests pull_data API using start/end API */
+	err = test_start_end(cg_fd);
+	if (err)
+		goto out;
+
+out:
+	printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+	close(cg_fd);
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+	int iov_count = 1, length = 1024, rate = 1;
+	struct sockmap_options options = {0};
+	int opt, longindex, err, cg_fd = 0;
+	char *bpf_file = BPF_FILENAME;
+	int test = PING_PONG;
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
+	if (argc < 2)
+		return test_suite();
+
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 's':
+			txmsg_start = atoi(optarg);
+			break;
+		case 'e':
+			txmsg_end = atoi(optarg);
+			break;
+		case 'a':
+			txmsg_apply = atoi(optarg);
+			break;
+		case 'k':
+			txmsg_cork = atoi(optarg);
+			break;
+		case 'c':
+			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+			if (cg_fd < 0) {
+				fprintf(stderr,
+					"ERROR: (%i) open cg path failed: %s\n",
+					cg_fd, optarg);
+				return cg_fd;
+			}
+			break;
+		case 'r':
+			rate = atoi(optarg);
+			break;
+		case 'v':
+			options.verbose = 1;
+			break;
+		case 'i':
+			iov_count = atoi(optarg);
+			break;
+		case 'l':
+			length = atoi(optarg);
+			break;
+		case 'd':
+			options.data_test = true;
+			break;
+		case 't':
+			if (strcmp(optarg, "ping") == 0) {
+				test = PING_PONG;
+			} else if (strcmp(optarg, "sendmsg") == 0) {
+				test = SENDMSG;
+			} else if (strcmp(optarg, "base") == 0) {
+				test = BASE;
+			} else if (strcmp(optarg, "base_sendpage") == 0) {
+				test = BASE_SENDPAGE;
+			} else if (strcmp(optarg, "sendpage") == 0) {
+				test = SENDPAGE;
+			} else {
+				usage(argv);
+				return -1;
+			}
+			break;
+		case 0:
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return -1;
+		}
+	}
+
+	if (!cg_fd) {
+		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+			argv[0]);
+		return -1;
+	}
+
+	err = populate_progs();
+	if (err) {
+		fprintf(stderr, "populate program: (%s) %s\n",
+			bpf_file, strerror(errno));
+		return 1;
+	}
+	running = 1;
+
+	/* catch SIGINT */
+	signal(SIGINT, running_handler);
+
+	options.iov_count = iov_count;
+	options.iov_length = length;
+	options.rate = rate;
+
+	err = run_options(&options, cg_fd, test);
+	close(cg_fd);
+	return err;
+}
+
+void running_handler(int a)
+{
+	running = 0;
+}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644
index 000000000000..33de97e2b6b6
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ *    client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	int len, *f, ret, zero = 0;
+	__u64 flags = 0;
+
+	if (lport == 10000)
+		ret = 10;
+	else
+		ret = 1;
+
+	len = (__u32)skb->data_end - (__u32)skb->data;
+	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+	if (f && *f) {
+		ret = 3;
+		flags = *f;
+	}
+
+	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+		   len, flags);
+	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 lport, rport;
+	int op, err = 0, index, key, ret;
+
+
+	op = (int) skops->op;
+
+	switch (op) {
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (lport == 10000) {
+			ret = 1;
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (bpf_ntohl(rport) == 10001) {
+			ret = 10;
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	int err1 = -1, err2 = -1, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   len1, err1, err2);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1, key = 0;
+	int *start, *end, *f;
+	__u64 flags = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+	int *f, *bytes, *start, *end, len1, len2;
+	__u64 flags = 0;
+
+		int err;
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+		   len1, flags, err1 ? err1 : err2);
+	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+	bpf_printk("sk_msg3: err %i\n", err);
+	return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000000..aeb2901f21f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,729 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+#   The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+#     root namespace   |     at_ns0 namespace
+#                      |
+#      -----------     |     -----------
+#      | tnl dev |     |     | tnl dev |  (overlay network)
+#      -----------     |     -----------
+#      metadata-mode   |     native-mode
+#       with bpf       |
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |  (underlay network)
+#      ----------    peer    ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+#    with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip link set dev veth1 up mtu 1500
+	ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE key 2 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+		local ::11 remote ::22
+
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip addr add dev $DEV fc80::200/24
+	ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 3
+	fi
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 7
+	fi
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+	# Set static ARP entry here because iptables set-mark works
+	# on L3 packet, as a result not applying to ARP packets,
+	# causing errors at get_tunnel_{key/opt}.
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 4789 gbp remote 172.16.1.200
+	ip netns exec at_ns0 \
+		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external gbp dstport 4789
+	ip link set dev $DEV address 52:54:00:d9:02:00 up
+	ip addr add dev $DEV 10.1.1.200/24
+	arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+	#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+	ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	#ip -4 addr del 172.16.1.200 dev veth1
+	ip -6 addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external dstport 4789
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 6081 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE dstport 6081 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 \
+		remote ::22     # geneve has no local option
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+test_gre()
+{
+	TYPE=gretap
+	DEV_NS=gretap00
+	DEV=gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_gre_tunnel
+	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+	TYPE=ip6gre
+	DEV_NS=ip6gre00
+	DEV=ip6gre11
+	ret=0
+
+	check $TYPE
+	config_device
+	# reuse the ip6gretap function
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+	TYPE=ip6gretap
+	DEV_NS=ip6gretap00
+	DEV=ip6gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+	TYPE=erspan
+	DEV_NS=erspan00
+	DEV=erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_erspan_tunnel $1
+	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+	TYPE=ip6erspan
+	DEV_NS=ip6erspan00
+	DEV=ip6erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6erspan_tunnel $1
+	attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+	ping6 $PING_ARG ::11
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=vxlan00
+	DEV=vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_vxlan_tunnel
+	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=ip6vxlan00
+	DEV=ip6vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6vxlan_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+	TYPE=geneve
+	DEV_NS=geneve00
+	DEV=geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_geneve_tunnel
+	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+	TYPE=geneve
+	DEV_NS=ip6geneve00
+	DEV=ip6geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6geneve_tunnel
+	attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+	TYPE=ipip
+	DEV_NS=ipip00
+	DEV=ipip11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+	TYPE=ip6tnl
+	DEV_NS=ipip6tnl00
+	DEV=ipip6tnl11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip6tnl_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+	auth=0x$(printf '1%.0s' {1..40})
+	enc=0x$(printf '2%.0s' {1..32})
+	spi_in_to_out=0x1
+	spi_out_to_in=0x2
+	# at_ns0 namespace
+	# at_ns0 -> root
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+			spi $spi_in_to_out reqid 1 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+			spi $spi_out_to_in reqid 2 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip netns exec at_ns0 \
+		ip addr add dev veth0 10.1.1.100/32
+	ip netns exec at_ns0 \
+		ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+			src 10.1.1.100
+
+	# root namespace
+	# at_ns0 -> root
+	ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+		spi $spi_in_to_out reqid 1 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+		spi $spi_out_to_in reqid 2 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip addr add dev veth1 10.1.1.200/32
+	ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+	config_device
+        #tcpdump -nei veth1 ip &
+	output=$(mktemp)
+	cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
+        setup_xfrm_tunnel
+	tc qdisc add dev veth1 clsact
+	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+		sec xfrm_get_state
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	sleep 1
+	grep "reqid 1" $output
+	check_err $?
+	grep "spi 0x1" $output
+	check_err $?
+	grep "remote ip 0xac100164" $output
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+	DEV=$1
+	SET=$2
+	GET=$3
+	tc qdisc add dev $DEV clsact
+	tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+	tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+	ip netns delete at_ns0 2> /dev/null
+	ip link del veth1 2> /dev/null
+	ip link del ipip11 2> /dev/null
+	ip link del ipip6tnl11 2> /dev/null
+	ip link del gretap11 2> /dev/null
+	ip link del ip6gre11 2> /dev/null
+	ip link del ip6gretap11 2> /dev/null
+	ip link del vxlan11 2> /dev/null
+	ip link del ip6vxlan11 2> /dev/null
+	ip link del geneve11 2> /dev/null
+	ip link del ip6geneve11 2> /dev/null
+	ip link del erspan11 2> /dev/null
+	ip link del ip6erspan11 2> /dev/null
+}
+
+cleanup_exit()
+{
+	echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+	cleanup
+	exit 0
+}
+
+check()
+{
+	ip link help $1 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP $1: iproute2 not support"
+	cleanup
+	return 1
+	fi
+}
+
+enable_debug()
+{
+	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+bpf_tunnel_test()
+{
+	echo "Testing GRE tunnel..."
+	test_gre
+	echo "Testing IP6GRE tunnel..."
+	test_ip6gre
+	echo "Testing IP6GRETAP tunnel..."
+	test_ip6gretap
+	echo "Testing ERSPAN tunnel..."
+	test_erspan v2
+	echo "Testing IP6ERSPAN tunnel..."
+	test_ip6erspan v2
+	echo "Testing VXLAN tunnel..."
+	test_vxlan
+	echo "Testing IP6VXLAN tunnel..."
+	test_ip6vxlan
+	echo "Testing GENEVE tunnel..."
+	test_geneve
+	echo "Testing IP6GENEVE tunnel..."
+	test_ip6geneve
+	echo "Testing IPIP tunnel..."
+	test_ipip
+	echo "Testing IPIP6 tunnel..."
+	test_ipip6
+	echo "Testing IPSec tunnel..."
+	test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000000..504df69c83df
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+		char fmt[] = "ERROR line:%d ret:%d\n";\
+		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+	} while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+	__be16	opt_class;
+	__u8	type;
+	__u8	length:5;
+	__u8	r3:1;
+	__u8	r2:1;
+	__u8	r1:1;
+	__u8	opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+	__u32     gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "key %d remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+	key.tunnel_label = 0xabcde;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+	md.version = 1;
+	md.u.index = bpf_htonl(123);
+#else
+	__u8 direction = 1;
+	__u8 hwid = 7;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11);
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+	md.u.index = bpf_htonl(123);
+	md.version = 1;
+#else
+	__u8 direction = 0;
+	__u8 hwid = 17;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.gbp);
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+	int ret, ret2;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		if (tcp->dest == bpf_htons(5200))
+			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+		else if (tcp->dest == bpf_htons(5201))
+			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+		else
+			return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct ipv6hdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+		key.remote_ipv6[3] = bpf_htonl(1);
+	} else {
+		if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+			ERROR(iph->nexthdr);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == bpf_htons(5200)) {
+			key.remote_ipv6[3] = bpf_htonl(1);
+		} else if (tcp->dest == bpf_htons(5201)) {
+			key.remote_ipv6[3] = bpf_htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+	struct bpf_xfrm_state x;
+	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+	int ret;
+
+	ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+	if (ret < 0)
+		return TC_ACT_OK;
+
+	bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+			 bpf_ntohl(x.remote_ipv4));
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3e7718b1a9ae..165e9ddfa446 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -64,6 +64,7 @@ struct bpf_test {
 	struct bpf_insn	insns[MAX_INSNS];
 	int fixup_map1[MAX_FIXUPS];
 	int fixup_map2[MAX_FIXUPS];
+	int fixup_map3[MAX_FIXUPS];
 	int fixup_prog[MAX_FIXUPS];
 	int fixup_map_in_map[MAX_FIXUPS];
 	const char *errstr;
@@ -88,6 +89,11 @@ struct test_val {
 	int foo[MAX_ENTRIES];
 };
 
+struct other_val {
+	long long foo;
+	long long bar;
+};
+
 static struct bpf_test tests[] = {
 	{
 		"add+sub+mul",
@@ -5594,6 +5600,257 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"map lookup helper access to map",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 8 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map update helper access to map",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map update helper access to map: wrong size",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.fixup_map3 = { 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=0 size=16",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      offsetof(struct other_val, bar)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 9 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm): out-of-bound 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      sizeof(struct other_val) - 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 9 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm): out-of-bound 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 9 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				      offsetof(struct other_val, bar)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg): out-of-bound 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				      sizeof(struct other_val) - 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg): out-of-bound 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, -4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				    offsetof(struct other_val, bar), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 11 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable): no max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = REJECT,
+		.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable): wrong max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				    offsetof(struct other_val, bar) + 1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 11 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=9 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
 		"map element value is preserved across register spilling",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -11533,6 +11790,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 {
 	int *fixup_map1 = test->fixup_map1;
 	int *fixup_map2 = test->fixup_map2;
+	int *fixup_map3 = test->fixup_map3;
 	int *fixup_prog = test->fixup_prog;
 	int *fixup_map_in_map = test->fixup_map_in_map;
 
@@ -11556,6 +11814,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 		} while (*fixup_map2);
 	}
 
+	if (*fixup_map3) {
+		map_fds[1] = create_map(sizeof(struct other_val), 1);
+		do {
+			prog[*fixup_map3].imm = map_fds[1];
+			fixup_map3++;
+		} while (*fixup_map3);
+	}
+
 	if (*fixup_prog) {
 		map_fds[2] = create_prog_array();
 		do {