diff options
author | David S. Miller <davem@davemloft.net> | 2014-03-31 00:45:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-31 00:45:49 -0400 |
commit | 9109e17f7c3ace48629397b44db5ce06bf168644 (patch) | |
tree | 495b67bcf755829a5409da5b7444ea9b93f60b35 /include | |
parent | 64c27237a07129758e33f5f824ba5c33b7f57417 (diff) | |
parent | 9a985cdc5ccb0d557720221d01bd70c19f04bb8c (diff) | |
download | linux-9109e17f7c3ace48629397b44db5ce06bf168644.tar.gz linux-9109e17f7c3ace48629397b44db5ce06bf168644.tar.bz2 linux-9109e17f7c3ace48629397b44db5ce06bf168644.zip |
Merge branch 'filter-next'
Daniel Borkmann says:
====================
BPF updates
We sat down and have heavily reworked the whole previous patchset
from v10 [1] to address all comments/concerns. This patchset therefore
*replaces* the internal BPF interpreter with the new layout as
discussed in [1], and migrates some exotic callers to properly use the
BPF API for a transparent upgrade. All other callers that already use
the BPF API in a way it should be used, need no further changes to run
the new internals. We also removed the sysctl knob entirely, and do not
expose any structure to userland, so that implementation details only
reside in kernel space. Since we are replacing the interpreter we had
to migrate seccomp in one patch along with the interpreter to not break
anything. When attaching a new filter, the flow can be described as
following: i) test if jit compiler is enabled and can compile the user
BPF, ii) if so, then go for it, iii) if not, then transparently migrate
the filter into the new representation, and run it in the interpreter.
Also, we have scratched the jit flag from the len attribute and made it
as initial patch in this series as Pablo has suggested in the last
feedback, thanks. For details, please refer to the patches themselves.
We did extensive testing of BPF and seccomp on the new interpreter
itself and also on the user ABIs and could not find any issues; new
performance numbers as posted in patch 8 are also still the same.
Please find more details in the patches themselves.
For all the previous history from v1 to v10, see [1]. We have decided
to drop the v11 as we have pedantically reworked the set, but of course,
included all previous feedback.
v3 -> v4:
- Applied feedback from Dave regarding swap insns
- Rebased on net-next
v2 -> v3:
- Rebased to latest net-next (i.e. w/ rxhash->hash rename)
- Fixed patch 8/9 commit message/doc as suggested by Dave
- Rest is unchanged
v1 -> v2:
- Rebased to latest net-next
- Added static to ptp_filter as suggested by Dave
- Fixed a typo in patch 8's commit message
- Rest unchanged
Thanks !
[1] http://thread.gmane.org/gmane.linux.kernel/1665858
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/filter.h | 118 | ||||
-rw-r--r-- | include/linux/isdn_ppp.h | 5 | ||||
-rw-r--r-- | include/linux/ptp_classify.h | 14 | ||||
-rw-r--r-- | include/linux/seccomp.h | 1 | ||||
-rw-r--r-- | include/net/sock.h | 27 |
5 files changed, 98 insertions, 67 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h index e568c8ef896b..262dcbb75ffe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -9,28 +9,81 @@ #include <linux/workqueue.h> #include <uapi/linux/filter.h> -#ifdef CONFIG_COMPAT -/* - * A struct sock_filter is architecture independent. +/* Internally used and optimized filter representation with extended + * instruction set based on top of classic BPF. */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG 11 + +/* BPF program can access up to 512 bytes of stack space. */ +#define MAX_BPF_STACK 512 + +/* Arg1, context and stack frame pointer register positions. */ +#define ARG1_REG 1 +#define CTX_REG 6 +#define FP_REG 10 + +struct sock_filter_int { + __u8 code; /* opcode */ + __u8 a_reg:4; /* dest register */ + __u8 x_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +#ifdef CONFIG_COMPAT +/* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { u16 len; - compat_uptr_t filter; /* struct sock_filter * */ + compat_uptr_t filter; /* struct sock_filter * */ }; #endif +struct sock_fprog_kern { + u16 len; + struct sock_filter *filter; +}; + struct sk_buff; struct sock; +struct seccomp_data; -struct sk_filter -{ +struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ + u32 jited:1, /* Is our filter JIT'ed? */ + len:31; /* Number of filter blocks */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, - const struct sock_filter *filter); + const struct sock_filter_int *filter); union { - struct sock_filter insns[0]; + struct sock_filter insns[0]; + struct sock_filter_int insnsi[0]; struct work_struct work; }; }; @@ -41,25 +94,44 @@ static inline unsigned int sk_filter_size(unsigned int proglen) offsetof(struct sk_filter, insns[proglen])); } -extern int sk_filter(struct sock *sk, struct sk_buff *skb); -extern unsigned int sk_run_filter(const struct sk_buff *skb, - const struct sock_filter *filter); -extern int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog); -extern void sk_unattached_filter_destroy(struct sk_filter *fp); -extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -extern int sk_detach_filter(struct sock *sk); -extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); -extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); -extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); +#define sk_filter_proglen(fprog) \ + (fprog->len * sizeof(fprog->filter[0])) + +#define SK_RUN_FILTER(filter, ctx) \ + (*filter->bpf_func)(ctx, filter->insnsi) + +int sk_filter(struct sock *sk, struct sk_buff *skb); + +u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, + const struct sock_filter_int *insni); +u32 sk_run_filter_int_skb(const struct sk_buff *ctx, + const struct sock_filter_int *insni); + +int sk_convert_filter(struct sock_filter *prog, int len, + struct sock_filter_int *new_prog, int *new_len); + +int sk_unattached_filter_create(struct sk_filter **pfp, + struct sock_fprog *fprog); +void sk_unattached_filter_destroy(struct sk_filter *fp); + +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_detach_filter(struct sock *sk); + +int sk_chk_filter(struct sock_filter *filter, unsigned int flen); +int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, + unsigned int len); +void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); + +void sk_filter_charge(struct sock *sk, struct sk_filter *fp); +void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); #ifdef CONFIG_BPF_JIT #include <stdarg.h> #include <linux/linkage.h> #include <linux/printk.h> -extern void bpf_jit_compile(struct sk_filter *fp); -extern void bpf_jit_free(struct sk_filter *fp); +void bpf_jit_compile(struct sk_filter *fp); +void bpf_jit_free(struct sk_filter *fp); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) @@ -70,7 +142,6 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); } -#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else #include <linux/slab.h> static inline void bpf_jit_compile(struct sk_filter *fp) @@ -80,7 +151,6 @@ static inline void bpf_jit_free(struct sk_filter *fp) { kfree(fp); } -#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) #endif static inline int bpf_tell_extensions(void) diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index d5f62bc5f4be..8e10f57f109f 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -180,9 +180,8 @@ struct ippp_struct { struct slcompress *slcomp; #endif #ifdef CONFIG_IPPP_FILTER - struct sock_filter *pass_filter; /* filter for packets to pass */ - struct sock_filter *active_filter; /* filter for pkts to reset idle */ - unsigned pass_len, active_len; + struct sk_filter *pass_filter; /* filter for packets to pass */ + struct sk_filter *active_filter; /* filter for pkts to reset idle */ #endif unsigned long debug; struct isdn_ppp_compressor *compressor,*decompressor; diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 1dc420ba213a..6d3b0a2ef9ce 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -27,11 +27,7 @@ #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/filter.h> -#ifdef __KERNEL__ #include <linux/in.h> -#else -#include <netinet/in.h> -#endif #define PTP_CLASS_NONE 0x00 /* not a PTP event message */ #define PTP_CLASS_V1 0x01 /* protocol version 1 */ @@ -84,14 +80,6 @@ #define OP_RETA (BPF_RET | BPF_A) #define OP_RETK (BPF_RET | BPF_K) -static inline int ptp_filter_init(struct sock_filter *f, int len) -{ - if (OP_LDH == f[0].code) - return sk_chk_filter(f, len); - else - return 0; -} - #define PTP_FILTER \ {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \ {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \ @@ -137,4 +125,6 @@ static inline int ptp_filter_init(struct sock_filter *f, int len) {OP_RETA, 0, 0, 0 }, /* */ \ /*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, +unsigned int ptp_classify_raw(const struct sk_buff *skb); + #endif diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 6f19cfd1840e..4054b0994071 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -76,7 +76,6 @@ static inline int seccomp_mode(struct seccomp *s) #ifdef CONFIG_SECCOMP_FILTER extern void put_seccomp_filter(struct task_struct *tsk); extern void get_seccomp_filter(struct task_struct *tsk); -extern u32 seccomp_bpf_load(int off); #else /* CONFIG_SECCOMP_FILTER */ static inline void put_seccomp_filter(struct task_struct *tsk) { diff --git a/include/net/sock.h b/include/net/sock.h index 8d7c431a0660..06a5668f05c9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1621,33 +1621,6 @@ void sk_common_release(struct sock *sk); /* Initialise core socket variables */ void sock_init_data(struct socket *sock, struct sock *sk); -void sk_filter_release_rcu(struct rcu_head *rcu); - -/** - * sk_filter_release - release a socket filter - * @fp: filter to remove - * - * Remove a filter from a socket and release its resources. - */ - -static inline void sk_filter_release(struct sk_filter *fp) -{ - if (atomic_dec_and_test(&fp->refcnt)) - call_rcu(&fp->rcu, sk_filter_release_rcu); -} - -static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) -{ - atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); - sk_filter_release(fp); -} - -static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) -{ - atomic_inc(&fp->refcnt); - atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); -} - /* * Socket reference counting postulates. * |