1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Cloudflare Ltd.
// Copyright (c) 2020 Isovalent, Inc.
#include <stddef.h>
#include <stdbool.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
static inline struct bpf_sock_tuple *
get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct bpf_sock_tuple *result;
struct ethhdr *eth;
__u64 tuple_len;
__u8 proto = 0;
__u64 ihl_len;
eth = (struct ethhdr *)(data);
if (eth + 1 > data_end)
return NULL;
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
if (iph + 1 > data_end)
return NULL;
if (iph->ihl != 5)
/* Options are not supported */
return NULL;
ihl_len = iph->ihl * 4;
proto = iph->protocol;
*ipv4 = true;
result = (struct bpf_sock_tuple *)&iph->saddr;
} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
if (ip6h + 1 > data_end)
return NULL;
ihl_len = sizeof(*ip6h);
proto = ip6h->nexthdr;
*ipv4 = false;
result = (struct bpf_sock_tuple *)&ip6h->saddr;
} else {
return (struct bpf_sock_tuple *)data;
}
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
return NULL;
*tcp = (proto == IPPROTO_TCP);
return result;
}
static inline int
handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
size_t tuple_len;
int ret;
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
return TC_ACT_SHOT;
sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
if (sk)
goto assign;
if (ipv4) {
if (tuple->ipv4.dport != bpf_htons(4321))
return TC_ACT_OK;
ln.ipv4.daddr = bpf_htonl(0x7f000001);
ln.ipv4.dport = bpf_htons(1234);
sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
BPF_F_CURRENT_NETNS, 0);
} else {
if (tuple->ipv6.dport != bpf_htons(4321))
return TC_ACT_OK;
/* Upper parts of daddr are already zero. */
ln.ipv6.daddr[3] = bpf_htonl(0x1);
ln.ipv6.dport = bpf_htons(1234);
sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
BPF_F_CURRENT_NETNS, 0);
}
/* workaround: We can't do a single socket lookup here, because then
* the compiler will likely spill tuple_len to the stack. This makes it
* lose all bounds information in the verifier, which then rejects the
* call as unsafe.
*/
if (!sk)
return TC_ACT_SHOT;
assign:
ret = bpf_sk_assign(skb, sk, 0);
bpf_sk_release(sk);
return ret;
}
static inline int
handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
size_t tuple_len;
int ret;
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
return TC_ACT_SHOT;
sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
if (sk) {
if (sk->state != BPF_TCP_LISTEN)
goto assign;
bpf_sk_release(sk);
}
if (ipv4) {
if (tuple->ipv4.dport != bpf_htons(4321))
return TC_ACT_OK;
ln.ipv4.daddr = bpf_htonl(0x7f000001);
ln.ipv4.dport = bpf_htons(1234);
sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
BPF_F_CURRENT_NETNS, 0);
} else {
if (tuple->ipv6.dport != bpf_htons(4321))
return TC_ACT_OK;
/* Upper parts of daddr are already zero. */
ln.ipv6.daddr[3] = bpf_htonl(0x1);
ln.ipv6.dport = bpf_htons(1234);
sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
BPF_F_CURRENT_NETNS, 0);
}
/* workaround: We can't do a single socket lookup here, because then
* the compiler will likely spill tuple_len to the stack. This makes it
* lose all bounds information in the verifier, which then rejects the
* call as unsafe.
*/
if (!sk)
return TC_ACT_SHOT;
if (sk->state != BPF_TCP_LISTEN) {
bpf_sk_release(sk);
return TC_ACT_SHOT;
}
assign:
ret = bpf_sk_assign(skb, sk, 0);
bpf_sk_release(sk);
return ret;
}
SEC("classifier/sk_assign_test")
int bpf_sk_assign_test(struct __sk_buff *skb)
{
struct bpf_sock_tuple *tuple, ln = {0};
bool ipv4 = false;
bool tcp = false;
int tuple_len;
int ret = 0;
tuple = get_tuple(skb, &ipv4, &tcp);
if (!tuple)
return TC_ACT_SHOT;
/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
* differs from bpf_sk_lookup_udp(), so even though the C-level type is
* the same here, if we try to share the implementations they will
* fail to verify because we're crossing pointer types.
*/
if (tcp)
ret = handle_tcp(skb, tuple, ipv4);
else
ret = handle_udp(skb, tuple, ipv4);
return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
}
|