diff options
-rw-r--r-- | include/net/ipv6_stubs.h | 5 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 10 | ||||
-rw-r--r-- | net/core/filter.c | 18 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 1 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 10 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/fib_lookup.c | 83 |
6 files changed, 120 insertions, 7 deletions
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a7d4a1a69f21..e0aa457f94a9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3264,6 +3264,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6964,6 +6969,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6976,6 +6982,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..3880bf0b740d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5992,6 +5995,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6010,7 +6025,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c6ad0d6e99b5..6337fb4504fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1061,6 +1061,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a7d4a1a69f21..e0aa457f94a9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3264,6 +3264,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6964,6 +6969,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6976,6 +6982,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 2fd05649bad1..4ad4cd69152e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -11,9 +11,13 @@ #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" +#define IPV6_IFACE_ADDR_SEC "cafe::cafe" +#define IPV6_ADDR_DST "face::3" #define IPV6_NUD_FAILED_ADDR "face::1" #define IPV6_NUD_STALE_ADDR "face::2" #define IPV4_IFACE_ADDR "10.0.0.254" +#define IPV4_IFACE_ADDR_SEC "10.1.0.254" +#define IPV4_ADDR_DST "10.2.0.254" #define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_STALE_ADDR "10.0.0.2" #define IPV4_TBID_ADDR "172.0.0.254" @@ -31,6 +35,7 @@ struct fib_lookup_test { const char *desc; const char *daddr; int expected_ret; + const char *expected_src; int lookup_flags; __u32 tbid; __u8 dmac[6]; @@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = { .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, .dmac = DMAC_INIT2, }, + { .desc = "IPv4 set src addr from netdev", + .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set src addr from netdev", + .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 set prefsrc addr from route", + .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set prefsrc addr route", + .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, }; static int ifindex; @@ -97,6 +118,13 @@ static int setup_netns(void) SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + /* Setup for prefsrc IP addr selection */ + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC); + + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC); + /* Setup for tbid lookup tests */ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); @@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; - ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) - return -1; + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) + return -1; + } + return 0; } @@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) return -1; params->family = AF_INET; - ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) - return -1; + + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) + return -1; + } return 0; } @@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } +static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src) +{ + int ret; + __u32 src6[4]; + __be32 src4; + + switch (fib_params->family) { + case AF_INET6: + ret = inet_pton(AF_INET6, expected_src, src6); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src)); + if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) { + char str_src6[64]; + + inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6, + sizeof(str_src6)); + printf("ipv6 expected %s actual %s ", expected_src, + str_src6); + } + + break; + case AF_INET: + ret = inet_pton(AF_INET, expected_src, &src4); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src"); + + break; + default: + PRINT_FAIL("invalid addr family: %d", fib_params->family); + } +} + void test_fib_lookup(void) { struct bpf_fib_lookup *fib_params; @@ -207,6 +275,9 @@ void test_fib_lookup(void) ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, "fib_lookup_ret"); + if (tests[i].expected_src) + assert_src_ip(fib_params, tests[i].expected_src); + ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); if (!ASSERT_EQ(ret, 0, "dmac not match")) { char expected[18], actual[18]; |