summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/bpf.h36
-rw-r--r--net/core/filter.c70
-rw-r--r--tools/include/uapi/linux/bpf.h36
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c74
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c95
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c97
7 files changed, 386 insertions, 24 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 85cfdffde182..146c742f1d49 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3121,6 +3121,38 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure:
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * Description
+ * Return the cgroup v2 id of the socket *sk*.
+ *
+ * *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ * returned from **bpf_sk_lookup_xxx**\ (),
+ * **bpf_sk_fullsock**\ (), etc. The format of returned id is
+ * same as in **bpf_skb_cgroup_id**\ ().
+ *
+ * This helper is available only if the kernel was compiled with
+ * the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *sk* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *sk*, then return value will be same as that
+ * of **bpf_sk_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *sk*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_sk_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3250,7 +3282,9 @@ union bpf_attr {
FN(sk_assign), \
FN(ktime_get_boot_ns), \
FN(seq_printf), \
- FN(seq_write),
+ FN(seq_write), \
+ FN(sk_cgroup_id), \
+ FN(sk_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 1fe8c0c2d408..5815902bb617 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4003,16 +4003,22 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
};
#ifdef CONFIG_SOCK_CGROUP_DATA
+static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
+{
+ struct cgroup *cgrp;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ return cgroup_id(cgrp);
+}
+
BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
{
struct sock *sk = skb_to_full_sk(skb);
- struct cgroup *cgrp;
if (!sk || !sk_fullsock(sk))
return 0;
- cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- return cgroup_id(cgrp);
+ return __bpf_sk_cgroup_id(sk);
}
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@ -4022,16 +4028,12 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
- ancestor_level)
+static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
+ int ancestor_level)
{
- struct sock *sk = skb_to_full_sk(skb);
struct cgroup *ancestor;
struct cgroup *cgrp;
- if (!sk || !sk_fullsock(sk))
- return 0;
-
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ancestor = cgroup_ancestor(cgrp, ancestor_level);
if (!ancestor)
@@ -4040,6 +4042,17 @@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
return cgroup_id(ancestor);
}
+BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
+ ancestor_level)
+{
+ struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
+ return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+}
+
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
.func = bpf_skb_ancestor_cgroup_id,
.gpl_only = false,
@@ -4047,6 +4060,31 @@ static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
+
+BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
+{
+ return __bpf_sk_cgroup_id(sk);
+}
+
+static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
+ .func = bpf_sk_cgroup_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCKET,
+};
+
+BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
+{
+ return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+}
+
+static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
+ .func = bpf_sk_ancestor_cgroup_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCKET,
+ .arg2_type = ARG_ANYTHING,
+};
#endif
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -6157,8 +6195,22 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
+ case BPF_FUNC_skb_ancestor_cgroup_id:
+ return &bpf_skb_ancestor_cgroup_id_proto;
+ case BPF_FUNC_sk_cgroup_id:
+ return &bpf_sk_cgroup_id_proto;
+ case BPF_FUNC_sk_ancestor_cgroup_id:
+ return &bpf_sk_ancestor_cgroup_id_proto;
#endif
#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_skc_lookup_tcp_proto;
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
case BPF_FUNC_get_listener_sock:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 85cfdffde182..146c742f1d49 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3121,6 +3121,38 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure:
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * Description
+ * Return the cgroup v2 id of the socket *sk*.
+ *
+ * *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ * returned from **bpf_sk_lookup_xxx**\ (),
+ * **bpf_sk_fullsock**\ (), etc. The format of returned id is
+ * same as in **bpf_skb_cgroup_id**\ ().
+ *
+ * This helper is available only if the kernel was compiled with
+ * the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *sk* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *sk*, then return value will be same as that
+ * of **bpf_sk_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *sk*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_sk_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3250,7 +3282,9 @@ union bpf_attr {
FN(sk_assign), \
FN(ktime_get_boot_ns), \
FN(seq_printf), \
- FN(seq_write),
+ FN(seq_write), \
+ FN(sk_cgroup_id), \
+ FN(sk_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 0ff64b70b746..999a775484c1 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -4,10 +4,14 @@
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+
+#include <sys/epoll.h>
+
#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include "bpf_util.h"
#include "network_helpers.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
@@ -77,9 +81,7 @@ static const size_t timeo_optlen = sizeof(timeo_sec);
int connect_to_fd(int family, int type, int server_fd)
{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
+ int fd, save_errno;
fd = socket(family, type, 0);
if (fd < 0) {
@@ -87,24 +89,70 @@ int connect_to_fd(int family, int type, int server_fd)
return -1;
}
- if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, timeo_optlen)) {
+ if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
+ save_errno = errno;
+ close(fd);
+ errno = save_errno;
+ return -1;
+ }
+
+ return fd;
+}
+
+int connect_fd_to_fd(int client_fd, int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int save_errno;
+
+ if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+ timeo_optlen)) {
log_err("Failed to set SO_RCVTIMEO");
- goto out;
+ return -1;
}
if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
log_err("Failed to get server addr");
- goto out;
+ return -1;
}
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server with family %d", family);
- goto out;
+ if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (errno != EINPROGRESS) {
+ save_errno = errno;
+ log_err("Failed to connect to server");
+ errno = save_errno;
+ }
+ return -1;
}
- return fd;
+ return 0;
+}
+
+int connect_wait(int fd)
+{
+ struct epoll_event ev = {}, events[2];
+ int timeout_ms = 1000;
+ int efd, nfd;
+
+ efd = epoll_create1(EPOLL_CLOEXEC);
+ if (efd < 0) {
+ log_err("Failed to open epoll fd");
+ return -1;
+ }
+
+ ev.events = EPOLLRDHUP | EPOLLOUT;
+ ev.data.fd = fd;
+
+ if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+ log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
+ close(efd);
+ return -1;
+ }
+
+ nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
+ if (nfd < 0)
+ log_err("Failed to wait for I/O event on epoll fd=%d", efd);
-out:
- close(fd);
- return -1;
+ close(efd);
+ return nfd;
}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index a0be7db4f67d..86914e6e7b53 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -35,5 +35,7 @@ extern struct ipv6_packet pkt_v6;
int start_server(int family, int type);
int connect_to_fd(int family, int type, int server_fd);
+int connect_fd_to_fd(int client_fd, int server_fd);
+int connect_wait(int client_fd);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
new file mode 100644
index 000000000000..059047af7df3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "cgroup_skb_sk_lookup_kern.skel.h"
+
+static void run_lookup_test(__u16 *g_serv_port, int out_sk)
+{
+ int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err;
+ struct sockaddr_in6 addr = {};
+ socklen_t addr_len = sizeof(addr);
+ __u32 duration = 0;
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM);
+ if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ return;
+
+ err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto cleanup;
+
+ *g_serv_port = addr.sin6_port;
+
+ /* Client outside of test cgroup should fail to connect by timeout. */
+ err = connect_fd_to_fd(out_sk, serv_sk);
+ if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
+ "unexpected result err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ err = connect_wait(out_sk);
+ if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
+ goto cleanup;
+
+ /* Client inside test cgroup should connect just fine. */
+ in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+ if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
+ goto cleanup;
+
+ serv_in_sk = accept(serv_sk, NULL, NULL);
+ if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno))
+ goto cleanup;
+
+cleanup:
+ close(serv_in_sk);
+ close(in_sk);
+ close(serv_sk);
+}
+
+static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
+{
+ struct cgroup_skb_sk_lookup_kern *skel;
+ struct bpf_link *link;
+ __u32 duration = 0;
+ int cgfd = -1;
+
+ skel = cgroup_skb_sk_lookup_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ return;
+
+ cgfd = test__join_cgroup(cg_path);
+ if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n"))
+ goto cleanup;
+
+ link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
+ if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ run_lookup_test(&skel->bss->g_serv_port, out_sk);
+
+ bpf_link__destroy(link);
+
+cleanup:
+ close(cgfd);
+ cgroup_skb_sk_lookup_kern__destroy(skel);
+}
+
+void test_cgroup_skb_sk_lookup(void)
+{
+ const char *cg_path = "/foo";
+ int out_sk;
+
+ /* Create a socket before joining testing cgroup so that its cgroup id
+ * differs from that of testing cgroup. Moving selftests process to
+ * testing cgroup won't change cgroup id of an already created socket.
+ */
+ out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (CHECK_FAIL(out_sk < 0))
+ return;
+
+ run_cgroup_bpf_test(cg_path, out_sk);
+
+ close(out_sk);
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
new file mode 100644
index 000000000000..3f757e30d7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+__u16 g_serv_port = 0;
+
+static inline void set_ip(__u32 *dst, const struct in6_addr *src)
+{
+ dst[0] = src->in6_u.u6_addr32[0];
+ dst[1] = src->in6_u.u6_addr32[1];
+ dst[2] = src->in6_u.u6_addr32[2];
+ dst[3] = src->in6_u.u6_addr32[3];
+}
+
+static inline void set_tuple(struct bpf_sock_tuple *tuple,
+ const struct ipv6hdr *ip6h,
+ const struct tcphdr *tcph)
+{
+ set_ip(tuple->ipv6.saddr, &ip6h->daddr);
+ set_ip(tuple->ipv6.daddr, &ip6h->saddr);
+ tuple->ipv6.sport = tcph->dest;
+ tuple->ipv6.dport = tcph->source;
+}
+
+static inline int is_allowed_peer_cg(struct __sk_buff *skb,
+ const struct ipv6hdr *ip6h,
+ const struct tcphdr *tcph)
+{
+ __u64 cgid, acgid, peer_cgid, peer_acgid;
+ struct bpf_sock_tuple tuple;
+ size_t tuple_len = sizeof(tuple.ipv6);
+ struct bpf_sock *peer_sk;
+
+ set_tuple(&tuple, ip6h, tcph);
+
+ peer_sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_len,
+ BPF_F_CURRENT_NETNS, 0);
+ if (!peer_sk)
+ return 0;
+
+ cgid = bpf_skb_cgroup_id(skb);
+ peer_cgid = bpf_sk_cgroup_id(peer_sk);
+
+ acgid = bpf_skb_ancestor_cgroup_id(skb, 2);
+ peer_acgid = bpf_sk_ancestor_cgroup_id(peer_sk, 2);
+
+ bpf_sk_release(peer_sk);
+
+ return cgid && cgid == peer_cgid && acgid && acgid == peer_acgid;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_lookup(struct __sk_buff *skb)
+{
+ __u32 serv_port_key = 0;
+ struct ipv6hdr ip6h;
+ struct tcphdr tcph;
+
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return 1;
+
+ /* For SYN packets coming to listening socket skb->remote_port will be
+ * zero, so IPv6/TCP headers are loaded to identify remote peer
+ * instead.
+ */
+ if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+ return 1;
+
+ if (ip6h.nexthdr != IPPROTO_TCP)
+ return 1;
+
+ if (bpf_skb_load_bytes(skb, sizeof(ip6h), &tcph, sizeof(tcph)))
+ return 1;
+
+ if (!g_serv_port)
+ return 0;
+
+ if (tcph.dest != g_serv_port)
+ return 1;
+
+ return is_allowed_peer_cg(skb, &ip6h, &tcph);
+}