diff options
76 files changed, 2729 insertions, 1003 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 492980ece1ab..6644842cd3ea 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -163,13 +163,13 @@ BPF_MUL 0x20 dst \*= src BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0 BPF_OR 0x40 dst \|= src BPF_AND 0x50 dst &= src -BPF_LSH 0x60 dst <<= src -BPF_RSH 0x70 dst >>= src +BPF_LSH 0x60 dst <<= (src & mask) +BPF_RSH 0x70 dst >>= (src & mask) BPF_NEG 0x80 dst = ~src BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst BPF_XOR 0xa0 dst ^= src BPF_MOV 0xb0 dst = src -BPF_ARSH 0xc0 sign extending shift right +BPF_ARSH 0xc0 sign extending dst >>= (src & mask) BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below) ======== ===== ========================================================== @@ -204,6 +204,9 @@ for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result interpreted as an unsigned 64-bit value. There are no instructions for signed division or modulo. +Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31) +for 32-bit operations. + Byte swap instructions ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 36e4b2d8cca2..f58895830ada 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2077,8 +2077,8 @@ struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); -int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname, int flags); +int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); +int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ diff --git a/include/linux/btf.h b/include/linux/btf.h index 508199e38415..cac9f304e27a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -98,10 +98,14 @@ struct btf_type; union bpf_attr; struct btf_show; struct btf_id_set; +struct bpf_prog; + +typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id); struct btf_kfunc_id_set { struct module *owner; struct btf_id_set8 *set; + btf_kfunc_filter_t filter; }; struct btf_id_dtor_kfunc { @@ -479,7 +483,6 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); } -struct bpf_prog; struct bpf_verifier_log; #ifdef CONFIG_BPF_SYSCALL @@ -487,10 +490,10 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); -u32 *btf_kfunc_id_set_contains(const struct btf *btf, - enum bpf_prog_type prog_type, - u32 kfunc_btf_id); -u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id); +u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, + const struct bpf_prog *prog); +u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, + const struct bpf_prog *prog); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset); @@ -517,8 +520,9 @@ static inline const char *btf_name_by_offset(const struct btf *btf, return NULL; } static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, - enum bpf_prog_type prog_type, - u32 kfunc_btf_id) + u32 kfunc_btf_id, + struct bpf_prog *prog) + { return NULL; } diff --git a/include/net/udp.h b/include/net/udp.h index de4b528522bb..5cad44318d71 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -437,7 +437,6 @@ struct udp_seq_afinfo { struct udp_iter_state { struct seq_net_private p; int bucket; - struct udp_seq_afinfo *bpf_seq_afinfo; }; void *udp_seq_start(struct seq_file *seq, loff_t *pos); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bb11a6ee667..9273c654743c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1272,6 +1272,9 @@ enum { /* Create a map that will be registered/unregesitered by the backed bpf_link */ BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), }; /* Flags for BPF_PROG_QUERY. */ @@ -1420,6 +1423,13 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6b682b8e4b50..947f0b83bfad 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -222,10 +222,17 @@ enum btf_kfunc_hook { enum { BTF_KFUNC_SET_MAX_CNT = 256, BTF_DTOR_KFUNC_MAX_CNT = 256, + BTF_KFUNC_FILTER_MAX_CNT = 16, +}; + +struct btf_kfunc_hook_filter { + btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT]; + u32 nr_filters; }; struct btf_kfunc_set_tab { struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; + struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX]; }; struct btf_id_dtor_kfunc_tab { @@ -7669,9 +7676,12 @@ static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags) /* Kernel Function (kfunc) BTF ID set registration API */ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - struct btf_id_set8 *add_set) + const struct btf_kfunc_id_set *kset) { + struct btf_kfunc_hook_filter *hook_filter; + struct btf_id_set8 *add_set = kset->set; bool vmlinux_set = !btf_is_module(btf); + bool add_filter = !!kset->filter; struct btf_kfunc_set_tab *tab; struct btf_id_set8 *set; u32 set_cnt; @@ -7686,6 +7696,24 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, return 0; tab = btf->kfunc_set_tab; + + if (tab && add_filter) { + u32 i; + + hook_filter = &tab->hook_filters[hook]; + for (i = 0; i < hook_filter->nr_filters; i++) { + if (hook_filter->filters[i] == kset->filter) { + add_filter = false; + break; + } + } + + if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) { + ret = -E2BIG; + goto end; + } + } + if (!tab) { tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); if (!tab) @@ -7708,7 +7736,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, */ if (!vmlinux_set) { tab->sets[hook] = add_set; - return 0; + goto do_add_filter; } /* In case of vmlinux sets, there may be more than one set being @@ -7750,6 +7778,11 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); +do_add_filter: + if (add_filter) { + hook_filter = &tab->hook_filters[hook]; + hook_filter->filters[hook_filter->nr_filters++] = kset->filter; + } return 0; end: btf_free_kfunc_set_tab(btf); @@ -7758,15 +7791,22 @@ end: static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_hook hook, - u32 kfunc_btf_id) + u32 kfunc_btf_id, + const struct bpf_prog *prog) { + struct btf_kfunc_hook_filter *hook_filter; struct btf_id_set8 *set; - u32 *id; + u32 *id, i; if (hook >= BTF_KFUNC_HOOK_MAX) return NULL; if (!btf->kfunc_set_tab) return NULL; + hook_filter = &btf->kfunc_set_tab->hook_filters[hook]; + for (i = 0; i < hook_filter->nr_filters; i++) { + if (hook_filter->filters[i](prog, kfunc_btf_id)) + return NULL; + } set = btf->kfunc_set_tab->sets[hook]; if (!set) return NULL; @@ -7821,23 +7861,25 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) * protection for looking up a well-formed btf->kfunc_set_tab. */ u32 *btf_kfunc_id_set_contains(const struct btf *btf, - enum bpf_prog_type prog_type, - u32 kfunc_btf_id) + u32 kfunc_btf_id, + const struct bpf_prog *prog) { + enum bpf_prog_type prog_type = resolve_prog_type(prog); enum btf_kfunc_hook hook; u32 *kfunc_flags; - kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id); + kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog); if (kfunc_flags) return kfunc_flags; hook = bpf_prog_type_to_kfunc_hook(prog_type); - return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog); } -u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id) +u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, + const struct bpf_prog *prog) { - return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id); + return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog); } static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, @@ -7868,7 +7910,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, goto err_out; } - ret = btf_populate_kfunc_set(btf, hook, kset->set); + ret = btf_populate_kfunc_set(btf, hook, kset); + err_out: btf_put(btf); return ret; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 9948b542a470..4174f76133df 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -435,7 +435,7 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent, return ret; } -static int bpf_obj_do_pin(const char __user *pathname, void *raw, +static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, enum bpf_type type) { struct dentry *dentry; @@ -444,22 +444,21 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw, umode_t mode; int ret; - dentry = user_path_create(AT_FDCWD, pathname, &path, 0); + dentry = user_path_create(path_fd, pathname, &path, 0); if (IS_ERR(dentry)) return PTR_ERR(dentry); - mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); - - ret = security_path_mknod(&path, dentry, mode, 0); - if (ret) - goto out; - dir = d_inode(path.dentry); if (dir->i_op != &bpf_dir_iops) { ret = -EPERM; goto out; } + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); + ret = security_path_mknod(&path, dentry, mode, 0); + if (ret) + goto out; + switch (type) { case BPF_TYPE_PROG: ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); @@ -478,7 +477,7 @@ out: return ret; } -int bpf_obj_pin_user(u32 ufd, const char __user *pathname) +int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) { enum bpf_type type; void *raw; @@ -488,14 +487,14 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) if (IS_ERR(raw)) return PTR_ERR(raw); - ret = bpf_obj_do_pin(pathname, raw, type); + ret = bpf_obj_do_pin(path_fd, pathname, raw, type); if (ret != 0) bpf_any_put(raw, type); return ret; } -static void *bpf_obj_do_get(const char __user *pathname, +static void *bpf_obj_do_get(int path_fd, const char __user *pathname, enum bpf_type *type, int flags) { struct inode *inode; @@ -503,7 +502,7 @@ static void *bpf_obj_do_get(const char __user *pathname, void *raw; int ret; - ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); + ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); @@ -527,7 +526,7 @@ out: return ERR_PTR(ret); } -int bpf_obj_get_user(const char __user *pathname, int flags) +int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; int f_flags; @@ -538,7 +537,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) if (f_flags < 0) return f_flags; - raw = bpf_obj_do_get(pathname, &type, f_flags); + raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); if (IS_ERR(raw)) return PTR_ERR(raw); diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 046ddff37a76..850494423530 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -62,9 +62,6 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); - WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, - "verifier log line truncated - local buffer too short\n"); - if (log->level == BPF_LOG_KERNEL) { bool newline = n > 0 && log->kbuf[n - 1] == '\n'; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 909c112ef537..92a57efc77de 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1931,6 +1931,11 @@ static int map_freeze(const union bpf_attr *attr) return -ENOTSUPP; } + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + fdput(f); + return -EPERM; + } + mutex_lock(&map->freeze_mutex); if (bpf_map_write_active(map)) { err = -EBUSY; @@ -1940,10 +1945,6 @@ static int map_freeze(const union bpf_attr *attr) err = -EBUSY; goto err_put; } - if (!bpf_capable()) { - err = -EPERM; - goto err_put; - } WRITE_ONCE(map->frozen, true); err_put: @@ -2697,23 +2698,38 @@ free_prog: return err; } -#define BPF_OBJ_LAST_FIELD file_flags +#define BPF_OBJ_LAST_FIELD path_fd static int bpf_obj_pin(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) + int path_fd; + + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) + return -EINVAL; + + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) return -EINVAL; - return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; + return bpf_obj_pin_user(attr->bpf_fd, path_fd, + u64_to_user_ptr(attr->pathname)); } static int bpf_obj_get(const union bpf_attr *attr) { + int path_fd; + if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || - attr->file_flags & ~BPF_OBJ_FLAG_MASK) + attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) + return -EINVAL; + + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) return -EINVAL; - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; + return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), attr->file_flags); } @@ -2968,10 +2984,17 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, { struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link); + u32 target_btf_id, target_obj_id; + bpf_trampoline_unpack_key(tr_link->trampoline->key, + &target_obj_id, &target_btf_id); seq_printf(seq, - "attach_type:\t%d\n", - tr_link->attach_type); + "attach_type:\t%d\n" + "target_obj_id:\t%u\n" + "target_btf_id:\t%u\n", + tr_link->attach_type, + target_obj_id, + target_btf_id); } static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ae2ee19f6894..086b2a14905b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10939,7 +10939,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env, *kfunc_name = func_name; func_proto = btf_type_by_id(desc_btf, func->type); - kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog); if (!kfunc_flags) { return -EACCES; } @@ -19010,7 +19010,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, * in the fmodret id set with the KF_SLEEPABLE flag. */ else { - u32 *flags = btf_kfunc_is_modify_return(btf, btf_id); + u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, + prog); if (flags && (*flags & KF_SLEEPABLE)) ret = 0; @@ -19038,7 +19039,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return -EINVAL; } ret = -EINVAL; - if (btf_kfunc_is_modify_return(btf, btf_id) || + if (btf_kfunc_is_modify_return(btf, btf_id, prog) || !check_attach_modify_return(addr, tname)) ret = 0; if (ret) { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 98143b86a9dd..2321bd2f9964 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -561,29 +561,6 @@ __bpf_kfunc int bpf_modify_return_test(int a, int *b) return a + *b; } -__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) -{ - return a + b + c + d; -} - -__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) -{ - return a + b; -} - -__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) -{ - return sk; -} - -long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) -{ - /* Provoke the compiler to assume that the caller has sign-extended a, - * b and c on platforms where this is required (e.g. s390x). - */ - return (long)a + (long)b + (long)c + d; -} - int noinline bpf_fentry_shadow_test(int a) { return a + 1; @@ -606,32 +583,6 @@ struct prog_test_ref_kfunc { refcount_t cnt; }; -static struct prog_test_ref_kfunc prog_test_struct = { - .a = 42, - .b = 108, - .next = &prog_test_struct, - .cnt = REFCOUNT_INIT(1), -}; - -__bpf_kfunc struct prog_test_ref_kfunc * -bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) -{ - refcount_inc(&prog_test_struct.cnt); - return &prog_test_struct; -} - -__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) -{ - WARN_ON_ONCE(1); -} - -__bpf_kfunc struct prog_test_member * -bpf_kfunc_call_memb_acquire(void) -{ - WARN_ON_ONCE(1); - return NULL; -} - __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { refcount_dec(&p->cnt); @@ -641,134 +592,6 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } -__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) -{ - WARN_ON_ONCE(1); -} - -static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) -{ - if (size > 2 * sizeof(int)) - return NULL; - - return (int *)p; -} - -__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, - const int rdwr_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); -} - -__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, - const int rdonly_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); -} - -/* the next 2 ones can't be really used for testing expect to ensure - * that the verifier rejects the call. - * Acquire functions must return struct pointers, so these ones are - * failing. - */ -__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, - const int rdonly_buf_size) -{ - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); -} - -__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) -{ -} - -struct prog_test_pass1 { - int x0; - struct { - int x1; - struct { - int x2; - struct { - int x3; - }; - }; - }; -}; - -struct prog_test_pass2 { - int len; - short arr1[4]; - struct { - char arr2[4]; - unsigned long arr3[8]; - } x; -}; - -struct prog_test_fail1 { - void *p; - int x; -}; - -struct prog_test_fail2 { - int x8; - struct prog_test_pass1 x; -}; - -struct prog_test_fail3 { - int len; - char arr1[2]; - char arr2[]; -}; - -__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) -{ -} - -__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) -{ - /* p != NULL, but p->cnt could be 0 */ -} - -__bpf_kfunc void bpf_kfunc_call_test_destructive(void) -{ -} - -__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) -{ - return arg; -} - __diag_pop(); BTF_SET8_START(bpf_test_modify_return_ids) @@ -782,32 +605,8 @@ static const struct btf_kfunc_id_set bpf_test_modify_return_set = { }; BTF_SET8_START(test_sk_check_kfunc_ids) -BTF_ID_FLAGS(func, bpf_kfunc_call_test1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test3) -BTF_ID_FLAGS(func, bpf_kfunc_call_test4) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) -BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, diff --git a/net/core/filter.c b/net/core/filter.c index 451b0ec7f242..968139f4a1ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11723,3 +11723,66 @@ static int __init bpf_kfunc_init(void) return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); } late_initcall(bpf_kfunc_init); + +/* Disables missing prototype warnings */ +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. + * + * The function expects a non-NULL pointer to a socket, and invokes the + * protocol specific socket destroy handlers. + * + * The helper can only be called from BPF contexts that have acquired the socket + * locks. + * + * Parameters: + * @sock: Pointer to socket to be destroyed + * + * Return: + * On error, may return EPROTONOSUPPORT, EINVAL. + * EPROTONOSUPPORT if protocol specific destroy handler is not supported. + * 0 otherwise + */ +__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) +{ + struct sock *sk = (struct sock *)sock; + + /* The locking semantics that allow for synchronous execution of the + * destroy handlers are only supported for TCP and UDP. + * Supporting protocols will need to acquire sock lock in the BPF context + * prior to invoking this kfunc. + */ + if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP && + sk->sk_protocol != IPPROTO_UDP)) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, ECONNABORTED); +} + +__diag_pop() + +BTF_SET8_START(bpf_sk_iter_kfunc_ids) +BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) +BTF_SET8_END(bpf_sk_iter_kfunc_ids) + +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) && + prog->expected_attach_type != BPF_TRACE_ITER) + return -EACCES; + return 0; +} + +static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_sk_iter_kfunc_ids, + .filter = tracing_iter_filter, +}; + +static int init_subsystem(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set); +} +late_initcall(init_subsystem); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1b4ec67cbd7..dc8e4697d234 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4553,8 +4553,10 @@ int tcp_abort(struct sock *sk, int err) return 0; } - /* Don't race with userspace socket closes such as tcp_close. */ - lock_sock(sk); + /* BPF context ensures sock locking. */ + if (!has_current_bpf_ctx()) + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); @@ -4578,7 +4580,8 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); tcp_write_queue_purge(sk); - release_sock(sk); + if (!has_current_bpf_ctx()) + release_sock(sk); return 0; } EXPORT_SYMBOL_GPL(tcp_abort); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a50bd782f91f..53e9ce2f05bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2967,7 +2967,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; - bool slow; uid_t uid; int ret; @@ -2975,7 +2974,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) return 0; if (sk_fullsock(sk)) - slow = lock_sock_fast(sk); + lock_sock(sk); if (unlikely(sk_unhashed(sk))) { ret = SEQ_SKIP; @@ -2999,7 +2998,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) unlock: if (sk_fullsock(sk)) - unlock_sock_fast(sk, slow); + release_sock(sk); return ret; } @@ -3361,7 +3360,7 @@ static struct bpf_iter_reg tcp_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__tcp, sk_common), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .get_func_proto = bpf_iter_tcp_get_func_proto, .seq_info = &tcp_seq_info, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 514ebd4aff74..fd3dae081f3a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2883,7 +2883,8 @@ EXPORT_SYMBOL(udp_poll); int udp_abort(struct sock *sk, int err) { - lock_sock(sk); + if (!has_current_bpf_ctx()) + lock_sock(sk); /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing * with close() @@ -2896,7 +2897,8 @@ int udp_abort(struct sock *sk, int err) __udp_disconnect(sk, 0); out: - release_sock(sk); + if (!has_current_bpf_ctx()) + release_sock(sk); return 0; } @@ -2941,9 +2943,30 @@ EXPORT_SYMBOL(udp_prot); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo, - struct net *net) +static unsigned short seq_file_family(const struct seq_file *seq); +static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) +{ + unsigned short family = seq_file_family(seq); + + /* AF_UNSPEC is used as a match all */ + return ((family == AF_UNSPEC || family == sk->sk_family) && + net_eq(sock_net(sk), seq_file_net(seq))); +} + +#ifdef CONFIG_BPF_SYSCALL +static const struct seq_operations bpf_iter_udp_seq_ops; +#endif +static struct udp_table *udp_get_table_seq(struct seq_file *seq, + struct net *net) { + const struct udp_seq_afinfo *afinfo; + +#ifdef CONFIG_BPF_SYSCALL + if (seq->op == &bpf_iter_udp_seq_ops) + return net->ipv4.udp_table; +#endif + + afinfo = pde_data(file_inode(seq->file)); return afinfo->udp_table ? : net->ipv4.udp_table; } @@ -2951,16 +2974,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) { struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct udp_seq_afinfo *afinfo; struct udp_table *udptable; struct sock *sk; - if (state->bpf_seq_afinfo) - afinfo = state->bpf_seq_afinfo; - else - afinfo = pde_data(file_inode(seq->file)); - - udptable = udp_get_table_afinfo(afinfo, net); + udptable = udp_get_table_seq(seq, net); for (state->bucket = start; state->bucket <= udptable->mask; ++state->bucket) { @@ -2971,10 +2988,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) spin_lock_bh(&hslot->lock); sk_for_each(sk, &hslot->head) { - if (!net_eq(sock_net(sk), net)) - continue; - if (afinfo->family == AF_UNSPEC || - sk->sk_family == afinfo->family) + if (seq_sk_match(seq, sk)) goto found; } spin_unlock_bh(&hslot->lock); @@ -2988,22 +3002,14 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct udp_seq_afinfo *afinfo; struct udp_table *udptable; - if (state->bpf_seq_afinfo) - afinfo = state->bpf_seq_afinfo; - else - afinfo = pde_data(file_inode(seq->file)); - do { sk = sk_next(sk); - } while (sk && (!net_eq(sock_net(sk), net) || - (afinfo->family != AF_UNSPEC && - sk->sk_family != afinfo->family))); + } while (sk && !seq_sk_match(seq, sk)); if (!sk) { - udptable = udp_get_table_afinfo(afinfo, net); + udptable = udp_get_table_seq(seq, net); if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3049,15 +3055,9 @@ EXPORT_SYMBOL(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; - struct udp_seq_afinfo *afinfo; struct udp_table *udptable; - if (state->bpf_seq_afinfo) - afinfo = state->bpf_seq_afinfo; - else - afinfo = pde_data(file_inode(seq->file)); - - udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq)); + udptable = udp_get_table_seq(seq, seq_file_net(seq)); if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3110,6 +3110,143 @@ struct bpf_iter__udp { int bucket __aligned(8); }; +struct bpf_udp_iter_state { + struct udp_iter_state state; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + int offset; + struct sock **batch; + bool st_bucket_done; +}; + +static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, + unsigned int new_batch_sz); +static struct sock *bpf_iter_udp_batch(struct seq_file *seq) +{ + struct bpf_udp_iter_state *iter = seq->private; + struct udp_iter_state *state = &iter->state; + struct net *net = seq_file_net(seq); + struct udp_table *udptable; + unsigned int batch_sks = 0; + bool resized = false; + struct sock *sk; + + /* The current batch is done, so advance the bucket. */ + if (iter->st_bucket_done) { + state->bucket++; + iter->offset = 0; + } + + udptable = udp_get_table_seq(seq, net); + +again: + /* New batch for the next bucket. + * Iterate over the hash table to find a bucket with sockets matching + * the iterator attributes, and return the first matching socket from + * the bucket. The remaining matched sockets from the bucket are batched + * before releasing the bucket lock. This allows BPF programs that are + * called in seq_show to acquire the bucket lock if needed. + */ + iter->cur_sk = 0; + iter->end_sk = 0; + iter->st_bucket_done = false; + batch_sks = 0; + + for (; state->bucket <= udptable->mask; state->bucket++) { + struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; + + if (hlist_empty(&hslot2->head)) { + iter->offset = 0; + continue; + } + + spin_lock_bh(&hslot2->lock); + udp_portaddr_for_each_entry(sk, &hslot2->head) { + if (seq_sk_match(seq, sk)) { + /* Resume from the last iterated socket at the + * offset in the bucket before iterator was stopped. + */ + if (iter->offset) { + --iter->offset; + continue; + } + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + batch_sks++; + } + } + spin_unlock_bh(&hslot2->lock); + + if (iter->end_sk) + break; + + /* Reset the current bucket's offset before moving to the next bucket. */ + iter->offset = 0; + } + + /* All done: no batch made. */ + if (!iter->end_sk) + return NULL; + + if (iter->end_sk == batch_sks) { + /* Batching is done for the current bucket; return the first + * socket to be iterated from the batch. + */ + iter->st_bucket_done = true; + goto done; + } + if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) { + resized = true; + /* After allocating a larger batch, retry one more time to grab + * the whole bucket. + */ + state->bucket--; + goto again; + } +done: + return iter->batch[0]; +} + +static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_udp_iter_state *iter = seq->private; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so unref the iter->cur_sk. + */ + if (iter->cur_sk < iter->end_sk) { + sock_put(iter->batch[iter->cur_sk++]); + ++iter->offset; + } + + /* After updating iter->cur_sk, check if there are more sockets + * available in the current bucket batch. + */ + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + /* Prepare a new batch. */ + sk = bpf_iter_udp_batch(seq); + + ++*pos; + return sk; +} + +static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos) +{ + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + if (*pos) + return bpf_iter_udp_batch(seq); + + return SEQ_START_TOKEN; +} + static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) { @@ -3130,18 +3267,37 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v) struct bpf_prog *prog; struct sock *sk = v; uid_t uid; + int ret; if (v == SEQ_START_TOKEN) return 0; + lock_sock(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return udp_prog_seq_show(prog, &meta, v, uid, state->bucket); + ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket); + +unlock: + release_sock(sk); + return ret; +} + +static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); } static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) { + struct bpf_udp_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -3152,17 +3308,35 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) (void)udp_prog_seq_show(prog, &meta, v, 0, 0); } - udp_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) { + bpf_iter_udp_put_batch(iter); + iter->st_bucket_done = false; + } } static const struct seq_operations bpf_iter_udp_seq_ops = { - .start = udp_seq_start, - .next = udp_seq_next, + .start = bpf_iter_udp_seq_start, + .next = bpf_iter_udp_seq_next, .stop = bpf_iter_udp_seq_stop, .show = bpf_iter_udp_seq_show, }; #endif +static unsigned short seq_file_family(const struct seq_file *seq) +{ + const struct udp_seq_afinfo *afinfo; + +#ifdef CONFIG_BPF_SYSCALL + /* BPF iterator: bpf programs to filter sockets. */ + if (seq->op == &bpf_iter_udp_seq_ops) + return AF_UNSPEC; +#endif + + /* Proc fs iterator */ + afinfo = pde_data(file_inode(seq->file)); + return afinfo->family; +} + const struct seq_operations udp_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, @@ -3371,38 +3545,55 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) -static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) +static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, + unsigned int new_batch_sz) { - struct udp_iter_state *st = priv_data; - struct udp_seq_afinfo *afinfo; - int ret; + struct sock **new_batch; - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); - if (!afinfo) + new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch), + GFP_USER | __GFP_NOWARN); + if (!new_batch) return -ENOMEM; - afinfo->family = AF_UNSPEC; - afinfo->udp_table = NULL; - st->bpf_seq_afinfo = afinfo; + bpf_iter_udp_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +#define INIT_BATCH_SZ 16 + +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct bpf_udp_iter_state *iter = priv_data; + int ret; + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) - kfree(afinfo); + return ret; + + ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ); + if (ret) + bpf_iter_fini_seq_net(priv_data); + return ret; } static void bpf_iter_fini_udp(void *priv_data) { - struct udp_iter_state *st = priv_data; + struct bpf_udp_iter_state *iter = priv_data; - kfree(st->bpf_seq_afinfo); bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); } static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, - .seq_priv_size = sizeof(struct udp_iter_state), + .seq_priv_size = sizeof(struct bpf_udp_iter_state), }; static struct bpf_iter_reg udp_reg_info = { @@ -3410,7 +3601,7 @@ static struct bpf_iter_reg udp_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &udp_seq_info, }; diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 11250c4734fe..3b7ba037af95 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -28,7 +28,7 @@ MAP COMMANDS | **bpftool** **map** { **show** | **list** } [*MAP*] | **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ | **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ -| [**dev** *NAME*] +| [**offload_dev** *NAME*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* [**key** *DATA*] @@ -73,7 +73,7 @@ DESCRIPTION maps. On such kernels bpftool will automatically emit this information as well. - **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*] + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**offload_dev** *NAME*] Create a new map with given parameters and pin it to *bpffs* as *FILE*. @@ -86,8 +86,8 @@ DESCRIPTION kernel needs it to collect metadata related to the inner maps that the new map will work with. - Keyword **dev** expects a network interface name, and is used - to request hardware offload for the map. + Keyword **offload_dev** expects a network interface name, + and is used to request hardware offload for the map. **bpftool map dump** *MAP* Dump all entries in a given *MAP*. In case of **name**, diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 9443c524bb76..dcae81bd27ed 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -31,7 +31,7 @@ PROG COMMANDS | **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] | **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] +| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** @@ -129,7 +129,7 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog loadall** pins all programs @@ -143,8 +143,11 @@ DESCRIPTION to be replaced in the ELF file counting from 0, while *NAME* allows to replace a map by name. *MAP* specifies the map to use, referring to it by **id** or through a **pinned** file. - If **dev** *NAME* is specified program will be loaded onto - given networking device (offload). + If **offload_dev** *NAME* is specified program will be loaded + onto given networking device (offload). + If **xdpmeta_dev** *NAME* is specified program will become + device-bound without offloading, this facilitates access + to XDP metadata. Optional **pinmaps** argument can be provided to pin all maps under *MAP_DIR* directory. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index e7234d1a5306..085bf18f3659 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -278,7 +278,7 @@ _bpftool() _bpftool_get_prog_tags return 0 ;; - dev) + dev|offload_dev|xdpmeta_dev) _sysfs_get_netdevs return 0 ;; @@ -508,7 +508,8 @@ _bpftool() ;; *) COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) - _bpftool_once_attr 'type dev pinmaps autoattach' + _bpftool_once_attr 'type pinmaps autoattach' + _bpftool_one_of_list 'offload_dev xdpmeta_dev' return 0 ;; esac @@ -733,7 +734,7 @@ _bpftool() esac ;; *) - _bpftool_once_attr 'type key value entries name flags dev' + _bpftool_once_attr 'type key value entries name flags offload_dev' if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then _bpftool_once_attr 'inner_map' fi diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1360c82ae732..cc6e6aae2447 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -68,7 +68,7 @@ void p_info(const char *fmt, ...) va_end(ap); } -static bool is_bpffs(char *path) +static bool is_bpffs(const char *path) { struct statfs st_fs; @@ -244,13 +244,16 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) return fd; } -int mount_bpffs_for_pin(const char *name) +int mount_bpffs_for_pin(const char *name, bool is_dir) { char err_str[ERR_MAX_LEN]; char *file; char *dir; int err = 0; + if (is_dir && is_bpffs(name)) + return err; + file = malloc(strlen(name) + 1); if (!file) { p_err("mem alloc failed"); @@ -286,7 +289,7 @@ int do_pin_fd(int fd, const char *name) { int err; - err = mount_bpffs_for_pin(name); + err = mount_bpffs_for_pin(name, false); if (err) return err; diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 9a1d2365a297..6b0e5202ca7a 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -76,7 +76,7 @@ static int do_pin(int argc, char **argv) goto close_obj; } - err = mount_bpffs_for_pin(path); + err = mount_bpffs_for_pin(path, false); if (err) goto close_link; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 243b74e18e51..2d786072ed0d 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -195,6 +195,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) show_link_attach_type_json(info->tracing.attach_type, json_wtr); + jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); + jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); break; case BPF_LINK_TYPE_CGROUP: jsonw_lluint_field(json_wtr, "cgroup_id", @@ -375,6 +377,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tprog_type %u ", prog_info.type); show_link_attach_type_plain(info->tracing.attach_type); + if (info->tracing.target_obj_id || info->tracing.target_btf_id) + printf("\n\ttarget_obj_id %u target_btf_id %u ", + info->tracing.target_obj_id, + info->tracing.target_btf_id); break; case BPF_LINK_TYPE_CGROUP: printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index a49534d7eafa..b8bb08d10dec 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -142,7 +142,7 @@ const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); int open_obj_pinned(const char *path, bool quiet); int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); -int mount_bpffs_for_pin(const char *name); +int mount_bpffs_for_pin(const char *name, bool is_dir); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index ae9e822aa3fe..f98f7bbea2b1 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1287,6 +1287,11 @@ static int do_create(int argc, char **argv) "flags")) goto exit; } else if (is_prefix(*argv, "dev")) { + p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n" + "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map."); + goto offload_dev; + } else if (is_prefix(*argv, "offload_dev")) { +offload_dev: NEXT_ARG(); if (attr.map_ifindex) { @@ -1431,7 +1436,7 @@ static int do_help(int argc, char **argv) "Usage: %1$s %2$s { show | list } [MAP]\n" " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" - " [inner_map MAP] [dev NAME]\n" + " [inner_map MAP] [offload_dev NAME]\n" " %1$s %2$s dump MAP\n" " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" " %1$s %2$s lookup MAP [key DATA]\n" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 91b6075b2db3..8443a149dd17 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1517,12 +1517,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; const char *pinmaps = NULL; + __u32 xdpmeta_ifindex = 0; + __u32 offload_ifindex = 0; bool auto_attach = false; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; unsigned int i, j; - __u32 ifindex = 0; const char *file; int idx, err; @@ -1614,17 +1615,46 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) map_replace[old_map_fds].fd = fd; old_map_fds++; } else if (is_prefix(*argv, "dev")) { + p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n" + "Going further, please use 'offload_dev <ifname>' to offload program to device.\n" + "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'."); + goto offload_dev; + } else if (is_prefix(*argv, "offload_dev")) { +offload_dev: NEXT_ARG(); - if (ifindex) { - p_err("offload device already specified"); + if (offload_ifindex) { + p_err("offload_dev already specified"); + goto err_free_reuse_maps; + } else if (xdpmeta_ifindex) { + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); + goto err_free_reuse_maps; + } + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + offload_ifindex = if_nametoindex(*argv); + if (!offload_ifindex) { + p_err("unrecognized netdevice '%s': %s", + *argv, strerror(errno)); + goto err_free_reuse_maps; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "xdpmeta_dev")) { + NEXT_ARG(); + + if (xdpmeta_ifindex) { + p_err("xdpmeta_dev already specified"); + goto err_free_reuse_maps; + } else if (offload_ifindex) { + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); goto err_free_reuse_maps; } if (!REQ_ARGS(1)) goto err_free_reuse_maps; - ifindex = if_nametoindex(*argv); - if (!ifindex) { + xdpmeta_ifindex = if_nametoindex(*argv); + if (!xdpmeta_ifindex) { p_err("unrecognized netdevice '%s': %s", *argv, strerror(errno)); goto err_free_reuse_maps; @@ -1671,7 +1701,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - bpf_program__set_ifindex(pos, ifindex); + if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) { + bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY); + bpf_program__set_ifindex(pos, xdpmeta_ifindex); + } else { + bpf_program__set_ifindex(pos, offload_ifindex); + } if (bpf_program__type(pos) != prog_type) bpf_program__set_type(pos, prog_type); bpf_program__set_expected_attach_type(pos, expected_attach_type); @@ -1709,7 +1744,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) idx = 0; bpf_object__for_each_map(map, obj) { if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) - bpf_map__set_ifindex(map, ifindex); + bpf_map__set_ifindex(map, offload_ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { err = bpf_map__reuse_fd(map, map_replace[j++].fd); @@ -1739,7 +1774,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = mount_bpffs_for_pin(pinfile); + err = mount_bpffs_for_pin(pinfile, !first_prog_only); if (err) goto err_close_obj; @@ -2416,7 +2451,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n" " %1$s %2$s pin PROG FILE\n" " %1$s %2$s { load | loadall } OBJ PATH \\\n" - " [type TYPE] [dev NAME] \\\n" + " [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n" " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" " [autoattach]\n" diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 57c3da70aa31..3ebc9fe91e0e 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -509,7 +509,7 @@ static int do_register(int argc, char **argv) if (argc == 1) linkdir = GET_ARG(); - if (linkdir && mount_bpffs_for_pin(linkdir)) { + if (linkdir && mount_bpffs_for_pin(linkdir, true)) { p_err("can't mount bpffs for pinning"); return -1; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1bb11a6ee667..9273c654743c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1272,6 +1272,9 @@ enum { /* Create a map that will be registered/unregesitered by the backed bpf_link */ BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), }; /* Flags for BPF_PROG_QUERY. */ @@ -1420,6 +1423,13 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 128ac723c4ea..ed86b37d8024 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -572,20 +572,30 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co (void *)keys, (void *)values, count, opts); } -int bpf_obj_pin(int fd, const char *pathname) +int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); union bpf_attr attr; int ret; + if (!OPTS_VALID(opts, bpf_obj_pin_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); + attr.path_fd = OPTS_GET(opts, path_fd, 0); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); attr.bpf_fd = fd; ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); return libbpf_err_errno(ret); } +int bpf_obj_pin(int fd, const char *pathname) +{ + return bpf_obj_pin_opts(fd, pathname, NULL); +} + int bpf_obj_get(const char *pathname) { return bpf_obj_get_opts(pathname, NULL); @@ -593,7 +603,7 @@ int bpf_obj_get(const char *pathname) int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); union bpf_attr attr; int fd; @@ -601,6 +611,7 @@ int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) return libbpf_err(-EINVAL); memset(&attr, 0, attr_sz); + attr.path_fd = OPTS_GET(opts, path_fd, 0); attr.pathname = ptr_to_u64((void *)pathname); attr.file_flags = OPTS_GET(opts, file_flags, 0); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a2c091389b18..9aa0ee473754 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -284,16 +284,30 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values __u32 *count, const struct bpf_map_batch_opts *opts); -struct bpf_obj_get_opts { +struct bpf_obj_pin_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 file_flags; + int path_fd; size_t :0; }; -#define bpf_obj_get_opts__last_field file_flags +#define bpf_obj_pin_opts__last_field path_fd LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); +LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname, + const struct bpf_obj_pin_opts *opts); + +struct bpf_obj_get_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 file_flags; + int path_fd; + + size_t :0; +}; +#define bpf_obj_get_opts__last_field path_fd + LIBBPF_API int bpf_obj_get(const char *pathname); LIBBPF_API int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 0a2c079244b6..8484b563b53d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1064,7 +1064,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) int err = 0; long sz; - f = fopen(path, "rb"); + f = fopen(path, "rbe"); if (!f) { err = -errno; goto err_out; diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 83e8e3bfd8ff..cf3323fd47b8 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -703,17 +703,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo /* obtain fd in BPF_REG_9 */ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); - /* jump to fd_array store if fd denotes module BTF */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); - /* set the default value for off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip BTF fd store for vmlinux BTF */ - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); - /* store BTF fd in slot */ + /* store BTF fd in slot, 0 for vmlinux */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* jump to insn[insn_idx].off store if fd denotes module BTF */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); + /* set the default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip BTF fd store for vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); /* store index into insn[insn_idx].off */ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); log: diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ad1ec893b41b..47632606b06d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1500,16 +1500,36 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return map; } -static size_t bpf_map_mmap_sz(const struct bpf_map *map) +static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { - long page_sz = sysconf(_SC_PAGE_SIZE); + const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; - map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = (size_t)roundup(value_sz, 8) * max_entries; map_sz = roundup(map_sz, page_sz); return map_sz; } +static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) +{ + void *mmaped; + + if (!map->mmaped) + return -EINVAL; + + if (old_sz == new_sz) + return 0; + + mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (mmaped == MAP_FAILED) + return -errno; + + memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); + munmap(map->mmaped, old_sz); + map->mmaped = mmaped; + return 0; +} + static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; @@ -1608,6 +1628,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, { struct bpf_map_def *def; struct bpf_map *map; + size_t mmap_sz; int err; map = bpf_object__add_map(obj); @@ -1642,7 +1663,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { err = -errno; @@ -4329,7 +4351,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); memset(info, 0, sizeof(*info)); - fp = fopen(file, "r"); + fp = fopen(file, "re"); if (!fp) { err = -errno; pr_warn("failed to open %s: %d. No procfs support?\n", file, @@ -4392,18 +4414,17 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (!new_name) return libbpf_err(-errno); - new_fd = open("/", O_RDONLY | O_CLOEXEC); + /* + * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. + * This is similar to what we do in ensure_good_fd(), but without + * closing original FD. + */ + new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); if (new_fd < 0) { err = -errno; goto err_free_new_name; } - new_fd = dup3(fd, new_fd, O_CLOEXEC); - if (new_fd < 0) { - err = -errno; - goto err_close_new_fd; - } - err = zclose(map->fd); if (err) { err = -errno; @@ -7433,7 +7454,7 @@ int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) int ret, err = 0; FILE *f; - f = fopen("/proc/kallsyms", "r"); + f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; pr_warn("failed to open /proc/kallsyms: %d\n", err); @@ -8294,7 +8315,10 @@ static void bpf_map__destroy(struct bpf_map *map) map->init_slots_sz = 0; if (map->mmaped) { - munmap(map->mmaped, bpf_map_mmap_sz(map)); + size_t mmap_sz; + + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + munmap(map->mmaped, mmap_sz); map->mmaped = NULL; } @@ -9412,10 +9436,103 @@ __u32 bpf_map__value_size(const struct bpf_map *map) return map->def.value_size; } +static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) +{ + struct btf *btf; + struct btf_type *datasec_type, *var_type; + struct btf_var_secinfo *var; + const struct btf_type *array_type; + const struct btf_array *array; + int vlen, element_sz, new_array_id; + __u32 nr_elements; + + /* check btf existence */ + btf = bpf_object__btf(map->obj); + if (!btf) + return -ENOENT; + + /* verify map is datasec */ + datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); + if (!btf_is_datasec(datasec_type)) { + pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify datasec has at least one var */ + vlen = btf_vlen(datasec_type); + if (vlen == 0) { + pr_warn("map '%s': cannot be resized, map value datasec is empty\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify last var in the datasec is an array */ + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); + if (!btf_is_array(array_type)) { + pr_warn("map '%s': cannot be resized, last var must be an array\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify request size aligns with array */ + array = btf_array(array_type); + element_sz = btf__resolve_size(btf, array->type); + if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { + pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", + bpf_map__name(map), element_sz, size); + return -EINVAL; + } + + /* create a new array based on the existing array, but with new length */ + nr_elements = (size - var->offset) / element_sz; + new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); + if (new_array_id < 0) + return new_array_id; + + /* adding a new btf type invalidates existing pointers to btf objects, + * so refresh pointers before proceeding + */ + datasec_type = btf_type_by_id(btf, map->btf_value_type_id); + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + + /* finally update btf info */ + datasec_type->size = size; + var->size = size - var->offset; + var_type->type = new_array_id; + + return 0; +} + int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { if (map->fd >= 0) return libbpf_err(-EBUSY); + + if (map->mmaped) { + int err; + size_t mmap_old_sz, mmap_new_sz; + + mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); + err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); + if (err) { + pr_warn("map '%s': failed to resize memory-mapped region: %d\n", + bpf_map__name(map), err); + return err; + } + err = map_btf_datasec_resize(map, size); + if (err && err != -ENOENT) { + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", + bpf_map__name(map), err); + map->btf_value_type_id = 0; + map->btf_key_type_id = 0; + } + } + map->def.value_size = size; return 0; } @@ -9441,7 +9558,7 @@ int bpf_map__set_initial_value(struct bpf_map *map, return 0; } -const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) { if (!map->mmaped) return NULL; @@ -9957,7 +10074,7 @@ static int parse_uint_from_file(const char *file, const char *fmt) int err, ret; FILE *f; - f = fopen(file, "r"); + f = fopen(file, "re"); if (!f) { err = -errno; pr_debug("failed to open '%s': %s\n", file, @@ -12693,7 +12810,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->map_cnt; i++) { struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map); + size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); int prot, map_fd = bpf_map__fd(map); void **mmaped = s->maps[i].mmaped; @@ -12720,8 +12837,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) * as per normal clean up procedure, so we don't need to worry * about it from skeleton's clean up perspective. */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, - MAP_SHARED | MAP_FIXED, map_fd, 0); + *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); if (*mmaped == MAP_FAILED) { err = -errno; *mmaped = NULL; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0b7362397ea3..754da73c643b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -869,8 +869,22 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); /* get/set map key size */ LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); -/* get/set map value size */ +/* get map value size */ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +/** + * @brief **bpf_map__set_value_size()** sets map value size. + * @param map the BPF map instance + * @return 0, on success; negative error, otherwise + * + * There is a special case for maps with associated memory-mapped regions, like + * the global data section maps (bss, data, rodata). When this function is used + * on such a map, the mapped region is resized. Afterward, an attempt is made to + * adjust the corresponding BTF info. This attempt is best-effort and can only + * succeed if the last variable of the data section map is an array. The array + * BTF type is replaced by a new BTF array type with a different length. + * Any previously existing pointers returned from bpf_map__initial_value() or + * corresponding data section skeleton pointer must be reinitialized. + */ LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); /* get map key/value BTF type IDs */ LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); @@ -884,7 +898,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); /** * @brief **bpf_map__is_internal()** tells the caller whether or not the diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index a5aa3a383d69..7521a2fb7626 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -391,3 +391,8 @@ LIBBPF_1.2.0 { bpf_map_get_info_by_fd; bpf_prog_get_info_by_fd; } LIBBPF_1.1.0; + +LIBBPF_1.3.0 { + global: + bpf_obj_pin_opts; +} LIBBPF_1.2.0; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 6065f408a59c..5f78a023b474 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -38,7 +38,7 @@ static __u32 get_ubuntu_kernel_version(void) if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0) return 0; - f = fopen(ubuntu_kver_file, "r"); + f = fopen(ubuntu_kver_file, "re"); if (!f) return 0; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 1fd2eeac5cfc..290411ddb39e 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 2 +#define LIBBPF_MINOR_VERSION 3 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 086eef355ab3..f1a141555f08 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -466,7 +466,7 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, proceed: sprintf(line, "/proc/%d/maps", pid); - f = fopen(line, "r"); + f = fopen(line, "re"); if (!f) { err = -errno; pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", @@ -954,8 +954,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct spec_map_fd = bpf_map__fd(man->specs_map); ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); - /* TODO: perform path resolution similar to uprobe's */ - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 28d2c77262be..538df8fb8c42 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -88,8 +88,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ xdp_features -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file -TEST_GEN_FILES += liburandom_read.so +TEST_GEN_FILES += liburandom_read.so urandom_read sign-file # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index f3c41f8902a0..642dda0e758a 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -36,8 +36,8 @@ extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, void *buffer, __u32 buffer__szk) __ksym; extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; -extern int bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; -extern int bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; +extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; +extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 52785ba671e6..cf216041876c 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -9,6 +9,7 @@ #include <linux/sysfs.h> #include <linux/tracepoint.h> #include "bpf_testmod.h" +#include "bpf_testmod_kfunc.h" #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" @@ -289,8 +290,171 @@ static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .set = &bpf_testmod_common_kfunc_ids, }; +__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) +{ + return a + b + c + d; +} + +__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) +{ + return a + b; +} + +__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) +{ + return sk; +} + +__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) +{ + /* Provoke the compiler to assume that the caller has sign-extended a, + * b and c on platforms where this is required (e.g. s390x). + */ + return (long)a + (long)b + (long)c + d; +} + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, + .cnt = REFCOUNT_INIT(1), +}; + +__bpf_kfunc struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + refcount_inc(&prog_test_struct.cnt); + return &prog_test_struct; +} + +__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) +{ + WARN_ON_ONCE(1); +} + +__bpf_kfunc struct prog_test_member * +bpf_kfunc_call_memb_acquire(void) +{ + WARN_ON_ONCE(1); + return NULL; +} + +__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +{ + WARN_ON_ONCE(1); +} + +static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) +{ + if (size > 2 * sizeof(int)) + return NULL; + + return (int *)p; +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, + const int rdwr_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +/* the next 2 ones can't be really used for testing expect to ensure + * that the verifier rejects the call. + * Acquire functions must return struct pointers, so these ones are + * failing. + */ +__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +{ + /* p != NULL, but p->cnt could be 0 */ +} + +__bpf_kfunc void bpf_kfunc_call_test_destructive(void) +{ +} + +__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) +{ + return arg; +} + BTF_SET8_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test4) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { @@ -312,6 +476,8 @@ static int bpf_testmod_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h new file mode 100644 index 000000000000..9693c626646b --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_TESTMOD_KFUNC_H +#define _BPF_TESTMOD_KFUNC_H + +#ifndef __KERNEL__ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#else +#define __ksym +struct prog_test_member1 { + int a; +}; + +struct prog_test_member { + struct prog_test_member1 m; + int c; +}; + +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_member memb; + struct prog_test_ref_kfunc *next; + refcount_t cnt; +}; +#endif + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[]; +}; + +struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; +void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; + +void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; +int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +void bpf_kfunc_call_int_mem_release(int *p) __ksym; + +/* The bpf_kfunc_call_test_static_unused_arg is defined as static, + * but bpf program compilation needs to see it as global symbol. + */ +#ifndef __KERNEL__ +u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#endif + +void bpf_testmod_test_mod_kfunc(int i) __ksym; + +__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, + __u32 c, __u64 d) __ksym; +int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; +struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; +long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; + +void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; +void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; +void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; +void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; + +void bpf_kfunc_call_test_destructive(void) __ksym; + +#endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 596caa176582..a105c0cd008a 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -427,3 +427,26 @@ void close_netns(struct nstoken *token) close(token->orig_netns_fd); free(token); } + +int get_socket_local_port(int sock_fd) +{ + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + int err; + + err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); + if (err < 0) + return err; + + if (addr.ss_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&addr; + + return sin->sin_port; + } else if (addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; + + return sin->sin6_port; + } + + return -1; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index f882c691b790..694185644da6 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -56,6 +56,7 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); char *ping_command(int family); +int get_socket_local_port(int sock_fd); struct nstoken; /** diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index a4d0cc9d3367..fe2c502e5089 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -11,6 +11,7 @@ #include "ksym_race.skel.h" #include "bpf_mod_race.skel.h" #include "kfunc_call_race.skel.h" +#include "testing_helpers.h" /* This test crafts a race between btf_try_get_module and do_init_module, and * checks whether btf_try_get_module handles the invocation for a well-formed @@ -44,35 +45,10 @@ enum bpf_test_state { static _Atomic enum bpf_test_state state = _TS_INVALID; -static int sys_finit_module(int fd, const char *param_values, int flags) -{ - return syscall(__NR_finit_module, fd, param_values, flags); -} - -static int sys_delete_module(const char *name, unsigned int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - -static int load_module(const char *mod) -{ - int ret, fd; - - fd = open("bpf_testmod.ko", O_RDONLY); - if (fd < 0) - return fd; - - ret = sys_finit_module(fd, "", 0); - close(fd); - if (ret < 0) - return ret; - return 0; -} - static void *load_module_thread(void *p) { - if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) + if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail")) atomic_store(&state, TS_MODULE_LOAD); else atomic_store(&state, TS_MODULE_LOAD_FAIL); @@ -124,7 +100,7 @@ static void test_bpf_mod_race_config(const struct test_config *config) if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) return; - if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) + if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod")) goto end_mmap; skel = bpf_mod_race__open(); @@ -202,8 +178,8 @@ end_destroy: bpf_mod_race__destroy(skel); ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); end_module: - sys_delete_module("bpf_testmod", 0); - ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); + unload_bpf_testmod(false); + ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod"); end_mmap: munmap(fault_addr, 4096); atomic_store(&state, _TS_INVALID); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c new file mode 100644 index 000000000000..31f1e815f671 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <test_progs.h> +#include <bpf/btf.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <linux/mount.h> +#include <sys/syscall.h> + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +__attribute__((unused)) +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned int ms_flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags); +} + +static void bpf_obj_pinning_detached(void) +{ + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + int fs_fd = -1, mnt_fd = -1; + int map_fd = -1, map_fd2 = -1; + int zero = 0, src_value, dst_value, err; + const char *map_name = "fsmount_map"; + + /* A bunch of below UAPI calls are constructed based on reading: + * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html + */ + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + if (!ASSERT_GE(fs_fd, 0, "fs_fd")) + goto cleanup; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (!ASSERT_OK(err, "fs_create")) + goto cleanup; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_GE(mnt_fd, 0, "mnt_fd")) + goto cleanup; + + /* If we wanted to expose detached mount in the file system, we'd do + * something like below. But the whole point is that we actually don't + * even have to expose BPF FS in the file system to be able to work + * (pin/get objects) with it. + * + * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH); + * if (!ASSERT_OK(err, "move_mount")) + * goto cleanup; + */ + + /* create BPF map to pin */ + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); + if (!ASSERT_GE(map_fd, 0, "map_fd")) + goto cleanup; + + /* pin BPF map into detached BPF FS through mnt_fd */ + pin_opts.file_flags = BPF_F_PATH_FD; + pin_opts.path_fd = mnt_fd; + err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts); + if (!ASSERT_OK(err, "map_pin")) + goto cleanup; + + /* get BPF map from detached BPF FS through mnt_fd */ + get_opts.file_flags = BPF_F_PATH_FD; + get_opts.path_fd = mnt_fd; + map_fd2 = bpf_obj_get_opts(map_name, &get_opts); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* update map through one FD */ + src_value = 0xcafebeef; + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq1"); + ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2"); + +cleanup: + if (map_fd >= 0) + ASSERT_OK(close(map_fd), "close_map_fd"); + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + if (fs_fd >= 0) + ASSERT_OK(close(fs_fd), "close_fs_fd"); + if (mnt_fd >= 0) + ASSERT_OK(close(mnt_fd), "close_mnt_fd"); +} + +enum path_kind +{ + PATH_STR_ABS, + PATH_STR_REL, + PATH_FD_REL, +}; + +static void validate_pin(int map_fd, const char *map_name, int src_value, + enum path_kind path_kind) +{ + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; + const char *pin_path = NULL; + int zero = 0, dst_value, map_fd2, err; + + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); + old_cwd[0] = '\0'; + + switch (path_kind) { + case PATH_STR_ABS: + /* absolute path */ + pin_path = abs_path; + break; + case PATH_STR_REL: + /* cwd + relative path */ + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); + pin_path = map_name; + break; + case PATH_FD_REL: + /* dir fd + relative path */ + pin_opts.file_flags = BPF_F_PATH_FD; + pin_opts.path_fd = open("/sys/fs/bpf", O_PATH); + ASSERT_GE(pin_opts.path_fd, 0, "path_fd"); + pin_path = map_name; + break; + } + + /* pin BPF map using specified path definition */ + err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts); + ASSERT_OK(err, "obj_pin"); + + /* cleanup */ + if (pin_opts.path_fd >= 0) + close(pin_opts.path_fd); + if (old_cwd[0]) + ASSERT_OK(chdir(old_cwd), "restore_cwd"); + + map_fd2 = bpf_obj_get(abs_path); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* update map through one FD */ + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq"); +cleanup: + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + unlink(abs_path); +} + +static void validate_get(int map_fd, const char *map_name, int src_value, + enum path_kind path_kind) +{ + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; + const char *pin_path = NULL; + int zero = 0, dst_value, map_fd2, err; + + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); + /* pin BPF map using specified path definition */ + err = bpf_obj_pin(map_fd, abs_path); + if (!ASSERT_OK(err, "pin_map")) + return; + + old_cwd[0] = '\0'; + + switch (path_kind) { + case PATH_STR_ABS: + /* absolute path */ + pin_path = abs_path; + break; + case PATH_STR_REL: + /* cwd + relative path */ + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); + pin_path = map_name; + break; + case PATH_FD_REL: + /* dir fd + relative path */ + get_opts.file_flags = BPF_F_PATH_FD; + get_opts.path_fd = open("/sys/fs/bpf", O_PATH); + ASSERT_GE(get_opts.path_fd, 0, "path_fd"); + pin_path = map_name; + break; + } + + map_fd2 = bpf_obj_get_opts(pin_path, &get_opts); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* cleanup */ + if (get_opts.path_fd >= 0) + close(get_opts.path_fd); + if (old_cwd[0]) + ASSERT_OK(chdir(old_cwd), "restore_cwd"); + + /* update map through one FD */ + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq"); +cleanup: + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + unlink(abs_path); +} + +static void bpf_obj_pinning_mounted(enum path_kind path_kind) +{ + const char *map_name = "mounted_map"; + int map_fd; + + /* create BPF map to pin */ + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); + if (!ASSERT_GE(map_fd, 0, "map_fd")) + return; + + validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind); + validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind); + ASSERT_OK(close(map_fd), "close_map_fd"); +} + +void test_bpf_obj_pinning() +{ + if (test__start_subtest("detached")) + bpf_obj_pinning_detached(); + if (test__start_subtest("mounted-str-abs")) + bpf_obj_pinning_mounted(PATH_STR_ABS); + if (test__start_subtest("mounted-str-rel")) + bpf_obj_pinning_mounted(PATH_STR_REL); + if (test__start_subtest("mounted-fd-rel")) + bpf_obj_pinning_mounted(PATH_FD_REL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c new file mode 100644 index 000000000000..fd41425d2e5c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <errno.h> +#include <sys/syscall.h> +#include <unistd.h> +#include "test_global_map_resize.skel.h" +#include "test_progs.h" + +static void run_prog_bss_array_sum(void) +{ + (void)syscall(__NR_getpid); +} + +static void run_prog_data_array_sum(void) +{ + (void)syscall(__NR_getuid); +} + +static void global_map_resize_bss_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2; + size_t array_len, actual_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + goto teardown; + + /* set some initial value before resizing. + * it is expected this non-zero value will be preserved + * while resizing. + */ + skel->bss->array[0] = 1; + + /* resize map value and verify the new size */ + map = skel->maps.bss; + err = bpf_map__set_value_size(map, desired_sz); + if (!ASSERT_OK(err, "bpf_map__set_value_size")) + goto teardown; + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) + goto teardown; + + /* set the expected number of elements based on the resized array */ + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]); + if (!ASSERT_GT(array_len, 1, "array_len")) + goto teardown; + + skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz); + if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)")) + goto teardown; + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) + goto teardown; + + /* fill the newly resized array with ones, + * skipping the first element which was previously set + */ + for (int i = 1; i < array_len; i++) + skel->bss->array[i] = 1; + + /* set global const values before loading */ + skel->rodata->pid = getpid(); + skel->rodata->bss_array_len = array_len; + skel->rodata->data_array_len = 1; + + err = test_global_map_resize__load(skel); + if (!ASSERT_OK(err, "test_global_map_resize__load")) + goto teardown; + err = test_global_map_resize__attach(skel); + if (!ASSERT_OK(err, "test_global_map_resize__attach")) + goto teardown; + + /* run the bpf program which will sum the contents of the array. + * since the array was filled with ones,verify the sum equals array_len + */ + run_prog_bss_array_sum(); + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +static void global_map_resize_data_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2; + size_t array_len, actual_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + goto teardown; + + /* set some initial value before resizing. + * it is expected this non-zero value will be preserved + * while resizing. + */ + skel->data_custom->my_array[0] = 1; + + /* resize map value and verify the new size */ + map = skel->maps.data_custom; + err = bpf_map__set_value_size(map, desired_sz); + if (!ASSERT_OK(err, "bpf_map__set_value_size")) + goto teardown; + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) + goto teardown; + + /* set the expected number of elements based on the resized array */ + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]); + if (!ASSERT_GT(array_len, 1, "array_len")) + goto teardown; + + skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz); + if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)")) + goto teardown; + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) + goto teardown; + + /* fill the newly resized array with ones, + * skipping the first element which was previously set + */ + for (int i = 1; i < array_len; i++) + skel->data_custom->my_array[i] = 1; + + /* set global const values before loading */ + skel->rodata->pid = getpid(); + skel->rodata->bss_array_len = 1; + skel->rodata->data_array_len = array_len; + + err = test_global_map_resize__load(skel); + if (!ASSERT_OK(err, "test_global_map_resize__load")) + goto teardown; + err = test_global_map_resize__attach(skel); + if (!ASSERT_OK(err, "test_global_map_resize__attach")) + goto teardown; + + /* run the bpf program which will sum the contents of the array. + * since the array was filled with ones,verify the sum equals array_len + */ + run_prog_data_array_sum(); + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +static void global_map_resize_invalid_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + __u32 element_sz, desired_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + return; + + /* attempt to resize a global datasec map to size + * which does NOT align with array + */ + map = skel->maps.data_custom; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf")) + goto teardown; + /* set desired size a fraction of element size beyond an aligned size */ + element_sz = sizeof(skel->data_custom->my_array[0]); + desired_sz = element_sz + element_sz / 2; + /* confirm desired size does NOT align with array */ + if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment")) + goto teardown; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val")) + goto teardown; + + /* attempt to resize a global datasec map whose only var is NOT an array */ + map = skel->maps.data_non_array; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf")) + goto teardown; + /* set desired size to arbitrary value */ + desired_sz = 1024; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val")) + goto teardown; + + /* attempt to resize a global datasec map + * whose last var is NOT an array + */ + map = skel->maps.data_array_not_last; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf")) + goto teardown; + /* set desired size to a multiple of element size */ + element_sz = sizeof(skel->data_array_not_last->my_array_first[0]); + desired_sz = element_sz * 8; + /* confirm desired size aligns with array */ + if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment")) + goto teardown; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +void test_global_map_resize(void) +{ + if (test__start_subtest("global_map_resize_bss")) + global_map_resize_bss_subtest(); + + if (test__start_subtest("global_map_resize_data")) + global_map_resize_data_subtest(); + + if (test__start_subtest("global_map_resize_invalid")) + global_map_resize_invalid_subtest(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 7fc01ff490db..f53d658ed080 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include <stdbool.h> #include "test_module_attach.skel.h" +#include "testing_helpers.h" static int duration; @@ -32,11 +33,6 @@ static int trigger_module_test_writable(int *val) return 0; } -static int delete_module(const char *name, int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - void test_module_attach(void) { const int READ_SZ = 456; @@ -93,21 +89,21 @@ void test_module_attach(void) if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); link = bpf_program__attach(skel->progs.handle_fexit); if (!ASSERT_OK_PTR(link, "attach_fexit")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); link = bpf_program__attach(skel->progs.kprobe_multi); if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index d3915c58d0e1..c3333edd029f 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -67,12 +67,12 @@ void serial_test_netcnt(void) } /* No packets should be lost */ - ASSERT_EQ(packets, 10000, "packets"); + ASSERT_GE(packets, 10000, "packets"); /* Let's check that bytes counter matches the number of packets * multiplied by the size of ipv6 ICMP packet. */ - ASSERT_EQ(bytes, packets * 104, "bytes"); + ASSERT_GE(bytes, packets * 104, "bytes"); err: if (cg_fd != -1) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c new file mode 100644 index 000000000000..b0583309a94e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/bpf_endian.h> + +#include "sock_destroy_prog.skel.h" +#include "sock_destroy_prog_fail.skel.h" +#include "network_helpers.h" + +#define TEST_NS "sock_destroy_netns" + +static void start_iter_sockets(struct bpf_program *prog) +{ + struct bpf_link *link; + char buf[50] = {}; + int iter_fd, len; + + link = bpf_program__attach_iter(prog, NULL); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "create_iter")) + goto free_link; + + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + ASSERT_GE(len, 0, "read"); + + close(iter_fd); + +free_link: + bpf_link__destroy(link); +} + +static void test_tcp_client(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, accept_serv = -1, n; + + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + accept_serv = accept(serv, NULL, NULL); + if (!ASSERT_GE(accept_serv, 0, "serv accept")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys connected client sockets. */ + start_iter_sockets(skel->progs.iter_tcp6_client); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"); + +cleanup: + if (clien != -1) + close(clien); + if (accept_serv != -1) + close(accept_serv); + if (serv != -1) + close(serv); +} + +static void test_tcp_server(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, accept_serv = -1, n, serv_port; + + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + serv_port = get_socket_local_port(serv); + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) + goto cleanup; + skel->bss->serv_port = (__be16) serv_port; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + accept_serv = accept(serv, NULL, NULL); + if (!ASSERT_GE(accept_serv, 0, "serv accept")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys server sockets. */ + start_iter_sockets(skel->progs.iter_tcp6_server); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket"); + +cleanup: + if (clien != -1) + close(clien); + if (accept_serv != -1) + close(accept_serv); + if (serv != -1) + close(serv); +} + +static void test_udp_client(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, n = 0; + + serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys sockets. */ + start_iter_sockets(skel->progs.iter_udp6_client); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + /* UDP sockets have an overriding error code after they are disconnected, + * so we don't check for ECONNABORTED error code. + */ + +cleanup: + if (clien != -1) + close(clien); + if (serv != -1) + close(serv); +} + +static void test_udp_server(struct sock_destroy_prog *skel) +{ + int *listen_fds = NULL, n, i, serv_port; + unsigned int num_listens = 5; + char buf[1]; + + /* Start reuseport servers. */ + listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM, + "::1", 0, 0, num_listens); + if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server")) + goto cleanup; + serv_port = get_socket_local_port(listen_fds[0]); + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) + goto cleanup; + skel->bss->serv_port = (__be16) serv_port; + + /* Run iterator program that destroys server sockets. */ + start_iter_sockets(skel->progs.iter_udp6_server); + + for (i = 0; i < num_listens; ++i) { + n = read(listen_fds[i], buf, sizeof(buf)); + if (!ASSERT_EQ(n, -1, "read") || + !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket")) + break; + } + ASSERT_EQ(i, num_listens, "server socket"); + +cleanup: + free_fds(listen_fds, num_listens); +} + +void test_sock_destroy(void) +{ + struct sock_destroy_prog *skel; + struct nstoken *nstoken = NULL; + int cgroup_fd; + + skel = sock_destroy_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + cgroup_fd = test__join_cgroup("/sock_destroy"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + skel->links.sock_connect = bpf_program__attach_cgroup( + skel->progs.sock_connect, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach")) + goto cleanup; + + SYS(cleanup, "ip netns add %s", TEST_NS); + SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS); + + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto cleanup; + + if (test__start_subtest("tcp_client")) + test_tcp_client(skel); + if (test__start_subtest("tcp_server")) + test_tcp_server(skel); + if (test__start_subtest("udp_client")) + test_udp_client(skel); + if (test__start_subtest("udp_server")) + test_udp_server(skel); + + RUN_TESTS(sock_destroy_prog_fail); + +cleanup: + if (nstoken) + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + if (cgroup_fd >= 0) + close(cgroup_fd); + sock_destroy_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 33dd4532e642..9e6a5e3ed4de 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -1060,7 +1060,9 @@ void test_sockopt(void) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { - test__start_subtest(tests[i].descr); + if (!test__start_subtest(tests[i].descr)) + continue; + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index d19f79048ff6..c3b45745cbcc 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -18,6 +18,7 @@ #include <linux/if_bonding.h> #include <linux/limits.h> #include <linux/udp.h> +#include <uapi/linux/netdev.h> #include "xdp_dummy.skel.h" #include "xdp_redirect_multi_kern.skel.h" @@ -492,6 +493,123 @@ out: system("ip link del bond_nest2"); } +static void test_xdp_bonding_features(struct skeletons *skeletons) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); + int bond_idx, veth1_idx, err; + struct bpf_link *link = NULL; + + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) + goto out; + + bond_idx = if_nametoindex("bond"); + if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond")) + goto out; + + /* query default xdp-feature for bond device */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"), + "add veth{0,1} pair")) + goto out; + + if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"), + "add veth{2,3} pair")) + goto out; + + if (!ASSERT_OK(system("ip link set veth0 master bond"), + "add veth0 to master bond")) + goto out; + + /* xdp-feature for bond device should be obtained from the single slave + * device (veth0) + */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "bond query_opts.feature_flags")) + goto out; + + veth1_idx = if_nametoindex("veth1"); + if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1")) + goto out; + + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, + veth1_idx); + if (!ASSERT_OK_PTR(link, "attach program to veth1")) + goto out; + + /* xdp-feature for veth0 are changed */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth2 master bond"), + "add veth2 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + /* xdp-feature for bond device should be set to the most restrict + * value obtained from attached slave devices (veth0 and veth2) + */ + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth2 nomaster"), + "del veth2 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth0 nomaster"), + "del veth0 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + "bond query_opts.feature_flags"); +out: + bpf_link__destroy(link); + system("ip link del veth0"); + system("ip link del veth2"); + system("ip link del bond"); +} + static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { @@ -546,6 +664,9 @@ void serial_test_xdp_bonding(void) if (test__start_subtest("xdp_bonding_nested")) test_xdp_bonding_nested(&skeletons); + if (test__start_subtest("xdp_bonding_features")) + test_xdp_bonding_features(&skeletons); + for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { struct bond_test_case *test_case = &bond_test_cases[i]; diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 50f95ec61165..76d661b20e87 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -2,6 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr *ptr; @@ -14,9 +15,6 @@ struct { __uint(max_entries, 16); } array_map SEC(".maps"); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - static __noinline int cb1(void *map, void *key, void *value, void *ctx) { void *p = *(void **)ctx; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index c2f0e18af951..7ce7e827d5f0 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -3,6 +3,7 @@ #include <errno.h> #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 0c053976f8f9..5985920d162e 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Facebook */ #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index 13f00ca2ed0a..f9789e668297 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -3,13 +3,11 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - SEC("tc") int test_jit_probe_mem(struct __sk_buff *ctx) { diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c index 767472bc5a97..7632d9ecb253 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern void bpf_kfunc_call_test_destructive(void) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_destructive_test(void) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c index b98313d391c6..4b0b7b79cdfb 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c @@ -2,14 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern void bpf_kfunc_call_int_mem_release(int *p) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct syscall_test_args { __u8 data[16]; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c index 4e8fed75a4e0..d532af07decf 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern void bpf_testmod_test_mod_kfunc(int i) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_fail(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 7daa8f5720b9..cf68d1e48a0f 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -2,22 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; -extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; - -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; -extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; -extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; -extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; -extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_test4(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index c1fdecabeabf..2380c75e74ce 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -1,13 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" extern const int bpf_prog_active __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; -extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; int active_res = -1; int sk_state_res = -1; @@ -28,7 +23,7 @@ int __noinline f1(struct __sk_buff *skb) if (active) active_res = *active; - sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state; + sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); } diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index 0ef286da092b..06838083079c 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -5,7 +5,8 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> -#include "bpf_experimental.h" +#include "../bpf_experimental.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct node_data { long key; @@ -32,8 +33,6 @@ struct map_value { */ struct node_data *just_here_because_btf_bug; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index d7150041e5d1..da30f0d59364 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -2,6 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; @@ -114,10 +115,6 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; - #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) static void test_kptr_unref(struct map_value *v) diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index da8c724f839b..450bb373b179 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -4,6 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_misc.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { char buf[8]; @@ -19,9 +20,6 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - SEC("?tc") __failure __msg("kptr access size must be BPF_DW") int size_not_bpf_dw(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c new file mode 100644 index 000000000000..9e0bf7a54cec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include "bpf_tracing_net.h" + +__be16 serv_port = 0; + +int bpf_sock_destroy(struct sock_common *sk) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} tcp_conn_sockets SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} udp_conn_sockets SEC(".maps"); + +SEC("cgroup/connect6") +int sock_connect(struct bpf_sock_addr *ctx) +{ + __u64 sock_cookie = 0; + int key = 0; + __u32 keyc = 0; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + sock_cookie = bpf_get_socket_cookie(ctx); + if (ctx->protocol == IPPROTO_TCP) + bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0); + else if (ctx->protocol == IPPROTO_UDP) + bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0); + else + return 1; + + return 1; +} + +SEC("iter/tcp") +int iter_tcp6_client(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + __u64 sock_cookie = 0; + __u64 *val; + int key = 0; + + if (!sk_common) + return 0; + + if (sk_common->skc_family != AF_INET6) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk_common); + val = bpf_map_lookup_elem(&tcp_conn_sockets, &key); + if (!val) + return 0; + /* Destroy connected client sockets. */ + if (sock_cookie == *val) + bpf_sock_destroy(sk_common); + + return 0; +} + +SEC("iter/tcp") +int iter_tcp6_server(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + const struct inet_connection_sock *icsk; + const struct inet_sock *inet; + struct tcp6_sock *tcp_sk; + __be16 srcp; + + if (!sk_common) + return 0; + + if (sk_common->skc_family != AF_INET6) + return 0; + + tcp_sk = bpf_skc_to_tcp6_sock(sk_common); + if (!tcp_sk) + return 0; + + icsk = &tcp_sk->tcp.inet_conn; + inet = &icsk->icsk_inet; + srcp = inet->inet_sport; + + /* Destroy server sockets. */ + if (srcp == serv_port) + bpf_sock_destroy(sk_common); + + return 0; +} + + +SEC("iter/udp") +int iter_udp6_client(struct bpf_iter__udp *ctx) +{ + struct udp_sock *udp_sk = ctx->udp_sk; + struct sock *sk = (struct sock *) udp_sk; + __u64 sock_cookie = 0, *val; + int key = 0; + + if (!sk) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk); + val = bpf_map_lookup_elem(&udp_conn_sockets, &key); + if (!val) + return 0; + /* Destroy connected client sockets. */ + if (sock_cookie == *val) + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + +SEC("iter/udp") +int iter_udp6_server(struct bpf_iter__udp *ctx) +{ + struct udp_sock *udp_sk = ctx->udp_sk; + struct sock *sk = (struct sock *) udp_sk; + struct inet_sock *inet; + __be16 srcp; + + if (!sk) + return 0; + + inet = &udp_sk->inet; + srcp = inet->inet_sport; + if (srcp == serv_port) + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c new file mode 100644 index 000000000000..dd6850b58e25 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int bpf_sock_destroy(struct sock_common *sk) __ksym; + +SEC("tp_btf/tcp_destroy_sock") +__failure __msg("calling kernel function bpf_sock_destroy is not allowed") +int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk) +{ + /* should not load */ + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c new file mode 100644 index 000000000000..2588f2384246 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +/* rodata section */ +const volatile pid_t pid; +const volatile size_t bss_array_len; +const volatile size_t data_array_len; + +/* bss section */ +int sum = 0; +int array[1]; + +/* custom data secton */ +int my_array[1] SEC(".data.custom"); + +/* custom data section which should NOT be resizable, + * since it contains a single var which is not an array + */ +int my_int SEC(".data.non_array"); + +/* custom data section which should NOT be resizable, + * since its last var is not an array + */ +int my_array_first[1] SEC(".data.array_not_last"); +int my_int_last SEC(".data.array_not_last"); + +SEC("tp/syscalls/sys_enter_getpid") +int bss_array_sum(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + sum = 0; + + for (size_t i = 0; i < bss_array_len; ++i) + sum += array[i]; + + return 0; +} + +SEC("tp/syscalls/sys_enter_getuid") +int data_array_sum(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + sum = 0; + + for (size_t i = 0; i < data_array_len; ++i) + sum += my_array[i]; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index bbad3c2d9aa5..f75e531bf36f 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -265,7 +265,10 @@ static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) { - __u16 *half = (__u16 *)&sk->dst_port; + __u16 *half; + + asm volatile (""); + half = (__u16 *)&sk->dst_port; return half[0] == bpf_htons(0xcafe); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 25ee4a22e48d..78c368e71797 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Meta */ #include <stddef.h> #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/if_packet.h> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 793689dcc170..4d582cac2c09 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -11,7 +11,6 @@ #include <signal.h> #include <string.h> #include <execinfo.h> /* backtrace */ -#include <linux/membarrier.h> #include <sys/sysinfo.h> /* get_nprocs */ #include <netinet/in.h> #include <sys/select.h> @@ -629,68 +628,6 @@ out: return err; } -static int finit_module(int fd, const char *param_values, int flags) -{ - return syscall(__NR_finit_module, fd, param_values, flags); -} - -static int delete_module(const char *name, int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - -/* - * Trigger synchronize_rcu() in kernel. - */ -int kern_sync_rcu(void) -{ - return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); -} - -static void unload_bpf_testmod(void) -{ - if (kern_sync_rcu()) - fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); - if (delete_module("bpf_testmod", 0)) { - if (errno == ENOENT) { - if (verbose()) - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); - return; - } - fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); - return; - } - if (verbose()) - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); -} - -static int load_bpf_testmod(void) -{ - int fd; - - /* ensure previous instance of the module is unloaded */ - unload_bpf_testmod(); - - if (verbose()) - fprintf(stdout, "Loading bpf_testmod.ko...\n"); - - fd = open("bpf_testmod.ko", O_RDONLY); - if (fd < 0) { - fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); - return -ENOENT; - } - if (finit_module(fd, "", 0)) { - fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); - close(fd); - return -EINVAL; - } - close(fd); - - if (verbose()) - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); - return 0; -} - /* extern declarations for test funcs */ #define DEFINE_TEST(name) \ extern void test_##name(void) __weak; \ @@ -1720,9 +1657,14 @@ int main(int argc, char **argv) env.stderr = stderr; env.has_testmod = true; - if (!env.list_test_names && load_bpf_testmod()) { - fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); - env.has_testmod = false; + if (!env.list_test_names) { + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(verbose()); + + if (load_bpf_testmod(verbose())) { + fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); + env.has_testmod = false; + } } /* initializing tests */ @@ -1819,7 +1761,7 @@ int main(int argc, char **argv) close(env.saved_netns_fd); out: if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); + unload_bpf_testmod(verbose()); free_test_selector(&env.test_selector); free_test_selector(&env.subtest_selector); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0ed3134333d4..77bd492c6024 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -405,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr) int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); -int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); int write_sysctl(const char *sysctl, const char *value); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e4657c5bc3f1..71704a38cac3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -40,6 +40,7 @@ #include "bpf_util.h" #include "test_btf.h" #include "../../../include/linux/filter.h" +#include "testing_helpers.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -873,8 +874,140 @@ static int create_map_kptr(void) return fd; } +static void set_root(bool set) +{ + __u64 caps; + + if (set) { + if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps)) + perror("cap_disable_effective(CAP_SYS_ADMIN)"); + } else { + if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) + perror("cap_disable_effective(CAP_SYS_ADMIN)"); + } +} + +static __u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t) ptr; +} + +static struct btf *btf__load_testmod_btf(struct btf *vmlinux) +{ + struct bpf_btf_info info; + __u32 len = sizeof(info); + struct btf *btf = NULL; + char name[64]; + __u32 id = 0; + int err, fd; + + /* Iterate all loaded BTF objects and find bpf_testmod, + * we need SYS_ADMIN cap for that. + */ + set_root(true); + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + perror("bpf_btf_get_next_id failed"); + break; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + perror("bpf_btf_get_fd_by_id failed"); + break; + } + + memset(&info, 0, sizeof(info)); + info.name_len = sizeof(name); + info.name = ptr_to_u64(name); + len = sizeof(info); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + close(fd); + perror("bpf_obj_get_info_by_fd failed"); + break; + } + + if (strcmp("bpf_testmod", name)) { + close(fd); + continue; + } + + btf = btf__load_from_kernel_by_id_split(id, vmlinux); + if (!btf) { + close(fd); + break; + } + + /* We need the fd to stay open so it can be used in fd_array. + * The final cleanup call to btf__free will free btf object + * and close the file descriptor. + */ + btf__set_fd(btf, fd); + break; + } + + set_root(false); + return btf; +} + +static struct btf *testmod_btf; +static struct btf *vmlinux_btf; + +static void kfuncs_cleanup(void) +{ + btf__free(testmod_btf); + btf__free(vmlinux_btf); +} + +static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array, + struct kfunc_btf_id_pair *fixup_kfunc_btf_id) +{ + /* Patch in kfunc BTF IDs */ + while (fixup_kfunc_btf_id->kfunc) { + int btf_id = 0; + + /* try to find kfunc in kernel BTF */ + vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf(); + if (vmlinux_btf) { + btf_id = btf__find_by_name_kind(vmlinux_btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + } + + /* kfunc not found in kernel BTF, try bpf_testmod BTF */ + if (!btf_id) { + testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf); + if (testmod_btf) { + btf_id = btf__find_by_name_kind(testmod_btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + if (btf_id) { + /* We put bpf_testmod module fd into fd_array + * and its index 1 into instruction 'off'. + */ + *fd_array = btf__fd(testmod_btf); + prog[fixup_kfunc_btf_id->insn_idx].off = 1; + } + } + } + + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; + fixup_kfunc_btf_id++; + } +} + static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, - struct bpf_insn *prog, int *map_fds) + struct bpf_insn *prog, int *map_fds, int *fd_array) { int *fixup_map_hash_8b = test->fixup_map_hash_8b; int *fixup_map_hash_48b = test->fixup_map_hash_48b; @@ -899,7 +1032,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; int *fixup_map_kptr = test->fixup_map_kptr; - struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -1100,25 +1232,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } while (*fixup_map_kptr); } - /* Patch in kfunc BTF IDs */ - if (fixup_kfunc_btf_id->kfunc) { - struct btf *btf; - int btf_id; - - do { - btf_id = 0; - btf = btf__load_vmlinux_btf(); - if (btf) { - btf_id = btf__find_by_name_kind(btf, - fixup_kfunc_btf_id->kfunc, - BTF_KIND_FUNC); - btf_id = btf_id < 0 ? 0 : btf_id; - } - btf__free(btf); - prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; - fixup_kfunc_btf_id++; - } while (fixup_kfunc_btf_id->kfunc); - } + fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); } struct libcap { @@ -1445,6 +1559,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int fd_array[2] = { -1, -1 }; int saved_errno; int fixup_skips; __u32 pflags; @@ -1458,7 +1573,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; fixup_skips = skips; - do_test_fixup(test, prog_type, prog, map_fds); + do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]); if (test->fill_insns) { prog = test->fill_insns; prog_len = test->prog_len; @@ -1492,6 +1607,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, else opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; + if (fd_array[1] != -1) + opts.fd_array = &fd_array[0]; if ((prog_type == BPF_PROG_TYPE_TRACING || prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) { @@ -1684,6 +1801,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0; + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(verbose); + + if (load_bpf_testmod(verbose)) + return EXIT_FAILURE; + for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; @@ -1711,6 +1834,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) } } + unload_bpf_testmod(verbose); + kfuncs_cleanup(); + printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, skips, errors); return errors ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 377fb157a57c..c2ad50f26b63 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -68,9 +68,6 @@ # Run with verbose output: # sudo ./test_xsk.sh -v # -# Run and dump packet contents: -# sudo ./test_xsk.sh -D -# # Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger: # sudo ./test_xsk.sh -d # @@ -81,11 +78,10 @@ ETH="" -while getopts "vDi:d" flag +while getopts "vi:d" flag do case "${flag}" in v) verbose=1;; - D) dump_pkts=1;; d) debug=1;; i) ETH=${OPTARG};; esac @@ -157,10 +153,6 @@ if [[ $verbose -eq 1 ]]; then ARGS+="-v " fi -if [[ $dump_pkts -eq 1 ]]; then - ARGS="-D " -fi - retval=$? test_status $retval "${TEST_NAME}" diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index dc9595ade8de..8d994884c7b4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -9,6 +9,7 @@ #include <bpf/libbpf.h> #include "test_progs.h" #include "testing_helpers.h" +#include <linux/membarrier.h> int parse_num_list(const char *s, bool **num_set, int *num_set_len) { @@ -326,3 +327,63 @@ __u64 read_perf_max_sample_freq(void) fclose(f); return sample_freq; } + +static int finit_module(int fd, const char *param_values, int flags) +{ + return syscall(__NR_finit_module, fd, param_values, flags); +} + +static int delete_module(const char *name, int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + +int unload_bpf_testmod(bool verbose) +{ + if (kern_sync_rcu()) + fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); + if (delete_module("bpf_testmod", 0)) { + if (errno == ENOENT) { + if (verbose) + fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); + return -1; + } + fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); + return -1; + } + if (verbose) + fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); + return 0; +} + +int load_bpf_testmod(bool verbose) +{ + int fd; + + if (verbose) + fprintf(stdout, "Loading bpf_testmod.ko...\n"); + + fd = open("bpf_testmod.ko", O_RDONLY); + if (fd < 0) { + fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); + return -ENOENT; + } + if (finit_module(fd, "", 0)) { + fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); + close(fd); + return -EINVAL; + } + close(fd); + + if (verbose) + fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); + return 0; +} + +/* + * Trigger synchronize_rcu() in kernel. + */ +int kern_sync_rcu(void) +{ + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 98f09bbae86f..5312323881b6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -1,5 +1,9 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (C) 2020 Facebook, Inc. */ + +#ifndef __TESTING_HELPERS_H +#define __TESTING_HELPERS_H + #include <stdbool.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -25,3 +29,8 @@ int parse_test_list_file(const char *path, bool is_glob_pattern); __u64 read_perf_max_sample_freq(void); +int load_bpf_testmod(bool verbose); +int unload_bpf_testmod(bool verbose); +int kern_sync_rcu(void); + +#endif /* __TESTING_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 04ed8b544712..8da8d557768b 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -134,6 +134,11 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); } +static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb) +{ + prod->cached_prod -= nb; +} + static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) { __u32 entries = xsk_cons_nb_avail(cons, nb); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index f144d0604ddf..218d7f694e5c 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -76,16 +76,13 @@ #include <asm/barrier.h> #include <linux/if_link.h> #include <linux/if_ether.h> -#include <linux/ip.h> #include <linux/mman.h> -#include <linux/udp.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> #include <poll.h> #include <pthread.h> #include <signal.h> -#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -94,10 +91,8 @@ #include <sys/socket.h> #include <sys/time.h> #include <sys/types.h> -#include <sys/queue.h> #include <time.h> #include <unistd.h> -#include <stdatomic.h> #include "xsk_xdp_progs.skel.h" #include "xsk.h" @@ -109,10 +104,6 @@ static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; -static const char *IP1 = "192.168.100.162"; -static const char *IP2 = "192.168.100.161"; -static const u16 UDP_PORT1 = 2020; -static const u16 UDP_PORT2 = 2121; static void __exit_with_error(int error, const char *file, const char *func, int line) { @@ -147,112 +138,25 @@ static void report_failure(struct test_spec *test) test->fail = true; } -static void memset32_htonl(void *dest, u32 val, u32 size) -{ - u32 *ptr = (u32 *)dest; - int i; - - val = htonl(val); - - for (i = 0; i < (size & (~0x3)); i += 4) - ptr[i >> 2] = val; -} - -/* - * Fold a partial checksum - * This function code has been taken from - * Linux kernel include/asm-generic/checksum.h - */ -static __u16 csum_fold(__u32 csum) -{ - u32 sum = (__force u32)csum; - - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __u16)~sum; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static u32 from64to32(u64 x) -{ - /* add up 32-bit and 32-bit for 32+c bit */ - x = (x & 0xffffffff) + (x >> 32); - /* add up carry.. */ - x = (x & 0xffffffff) + (x >> 32); - return (u32)x; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - unsigned long long s = (__force u32)sum; - - s += (__force u32)saddr; - s += (__force u32)daddr; -#ifdef __BIG_ENDIAN__ - s += proto + len; -#else - s += (proto + len) << 8; -#endif - return (__force __u32)from64to32(s); -} - -/* - * This function has been taken from - * Linux kernel include/asm-generic/checksum.h +/* The payload is a word consisting of a packet sequence number in the upper + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. */ -static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); -} - -static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) +static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) { - u32 csum = 0; - u32 cnt = 0; - - /* udp hdr and data */ - for (; cnt < len; cnt += 2) - csum += udp_pkt[cnt >> 1]; + u32 *ptr = (u32 *)dest, i; - return csum_tcpudp_magic(saddr, daddr, len, proto, csum); + start /= sizeof(*ptr); + size /= sizeof(*ptr); + for (i = 0; i < size; i++) + ptr[i] = htonl(pkt_nb << 16 | (i + start)); } static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) { memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_IP); -} - -static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) -{ - ip_hdr->version = IP_PKT_VER; - ip_hdr->ihl = 0x5; - ip_hdr->tos = IP_PKT_TOS; - ip_hdr->tot_len = htons(IP_PKT_SIZE); - ip_hdr->id = 0; - ip_hdr->frag_off = 0; - ip_hdr->ttl = IPDEFTTL; - ip_hdr->protocol = IPPROTO_UDP; - ip_hdr->saddr = ifobject->src_ip; - ip_hdr->daddr = ifobject->dst_ip; - ip_hdr->check = 0; -} - -static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, - struct udphdr *udp_hdr) -{ - udp_hdr->source = htons(ifobject->src_port); - udp_hdr->dest = htons(ifobject->dst_port); - udp_hdr->len = htons(UDP_PKT_SIZE); - memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); } static bool is_umem_valid(struct ifobject *ifobj) @@ -260,19 +164,18 @@ static bool is_umem_valid(struct ifobject *ifobj) return !!ifobj->umem->umem; } -static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) +static u32 mode_to_xdp_flags(enum test_mode mode) { - udp_hdr->check = 0; - udp_hdr->check = - udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); + return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; } -static u32 mode_to_xdp_flags(enum test_mode mode) +static u64 umem_size(struct xsk_umem_info *umem) { - return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; + return umem->num_frames * umem->frame_size; } -static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) +static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, + u64 size) { struct xsk_umem_config cfg = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, @@ -292,9 +195,31 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size return ret; umem->buffer = buffer; + if (ifobj->shared_umem && ifobj->rx_on) { + umem->base_addr = umem_size(umem); + umem->next_buffer = umem_size(umem); + } + return 0; } +static u64 umem_alloc_buffer(struct xsk_umem_info *umem) +{ + u64 addr; + + addr = umem->next_buffer; + umem->next_buffer += umem->frame_size; + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) + umem->next_buffer = umem->base_addr; + + return addr; +} + +static void umem_reset_alloc(struct xsk_umem_info *umem) +{ + umem->next_buffer = 0; +} + static void enable_busy_poll(struct xsk_socket_info *xsk) { int sock_opt; @@ -354,7 +279,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject) exit_with_error(ENOMEM); } umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - ret = xsk_configure_umem(umem, bufs, umem_sz); + ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz); if (ret) exit_with_error(-ret); @@ -380,7 +305,6 @@ out: static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, - {"dump-pkts", no_argument, 0, 'D'}, {"verbose", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -391,7 +315,6 @@ static void usage(const char *prog) " Usage: %s [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" - " -D, --dump-pkts Dump packets L2 - L5\n" " -v, --verbose Verbose output\n" " -b, --busy-poll Enable busy poll\n"; @@ -415,7 +338,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); + c = getopt_long(argc, argv, "i:vb", long_options, &option_index); if (c == -1) break; @@ -437,9 +360,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj interface_nb++; break; - case 'D': - opt_pkt_dump = true; - break; case 'v': opt_verbose = true; break; @@ -482,9 +402,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, memset(ifobj->umem, 0, sizeof(*ifobj->umem)); ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - if (ifobj->shared_umem && ifobj->rx_on) - ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS * - XSK_UMEM__DEFAULT_FRAME_SIZE; for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); @@ -554,24 +471,24 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x static void pkt_stream_reset(struct pkt_stream *pkt_stream) { if (pkt_stream) - pkt_stream->rx_pkt_nb = 0; + pkt_stream->current_pkt_nb = 0; } -static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) +static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) { - if (pkt_nb >= pkt_stream->nb_pkts) + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) return NULL; - return &pkt_stream->pkts[pkt_nb]; + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; } static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) { - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { (*pkts_sent)++; - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; - pkt_stream->rx_pkt_nb++; + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; + pkt_stream->current_pkt_nb++; } return NULL; } @@ -616,9 +533,21 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) return pkt_stream; } -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) +static u32 ceil_u32(u32 a, u32 b) +{ + return (a + b - 1) / b; +} + +static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt) +{ + if (!pkt || !pkt->valid) + return 1; + return ceil_u32(pkt->len, frame_size); +} + +static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) { - pkt->addr = addr + umem->base_addr; + pkt->offset = offset; pkt->len = len; if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) pkt->valid = false; @@ -626,6 +555,11 @@ static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 l pkt->valid = true; } +static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) +{ + return ceil_u32(len, umem->frame_size) * umem->frame_size; +} + static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; @@ -635,10 +569,13 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb if (!pkt_stream) exit_with_error(ENOMEM); + pkt_stream->nb_pkts = nb_pkts; + pkt_stream->max_pkt_len = pkt_len; for (i = 0; i < nb_pkts; i++) { - pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, - pkt_len); - pkt_stream->pkts[i].payload = i; + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt_set(umem, pkt, 0, pkt_len); + pkt->pkt_nb = i; } return pkt_stream; @@ -669,8 +606,7 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], - (i % umem->num_frames) * umem->frame_size + offset, pkt_len); + pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); ifobj->pkt_stream = pkt_stream; } @@ -694,30 +630,31 @@ static void pkt_stream_receive_half(struct test_spec *test) pkt_stream->pkts[i].valid = false; } -static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) +static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) { - struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); - struct udphdr *udp_hdr; - struct ethhdr *eth_hdr; - struct iphdr *ip_hdr; - void *data; + if (!pkt->valid) + return pkt->offset; + return pkt->offset + umem_alloc_buffer(umem); +} - if (!pkt) - return NULL; - if (!pkt->valid || pkt->len < MIN_PKT_SIZE) - return pkt; +static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, + u32 bytes_written) +{ + void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); - data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); - udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); - ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); - eth_hdr = (struct ethhdr *)data; + if (len < MIN_PKT_SIZE) + return; - gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); - gen_ip_hdr(ifobject, ip_hdr); - gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr(ifobject, eth_hdr); + if (!bytes_written) { + gen_eth_hdr(ifobject, data); - return pkt; + len -= PKT_HDR_SIZE; + data += PKT_HDR_SIZE; + } else { + bytes_written -= PKT_HDR_SIZE; + } + + write_payload(data, pkt_nb, bytes_written, len); } static void __pkt_stream_generate_custom(struct ifobject *ifobj, @@ -731,10 +668,14 @@ static void __pkt_stream_generate_custom(struct ifobject *ifobj, exit_with_error(ENOMEM); for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr; - pkt_stream->pkts[i].len = pkts[i].len; - pkt_stream->pkts[i].payload = i; - pkt_stream->pkts[i].valid = pkts[i].valid; + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt->offset = pkts[i].offset; + pkt->len = pkts[i].len; + pkt->pkt_nb = i; + pkt->valid = pkts[i].valid; + if (pkt->len > pkt_stream->max_pkt_len) + pkt_stream->max_pkt_len = pkt->len; } ifobj->pkt_stream = pkt_stream; @@ -746,53 +687,62 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts); } -static void pkt_dump(void *pkt, u32 len) -{ - char s[INET_ADDRSTRLEN]; - struct ethhdr *ethhdr; - struct udphdr *udphdr; - struct iphdr *iphdr; - u32 payload, i; - - ethhdr = pkt; - iphdr = pkt + sizeof(*ethhdr); - udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); - - /*extract L2 frame */ - fprintf(stdout, "DEBUG>> L2: dst mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_dest[i]); - - fprintf(stdout, "\nDEBUG>> L2: src mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_source[i]); - - /*extract L3 frame */ - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", - inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", - inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); - /*extract L4 frame */ - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); - /*extract L5 frame */ - payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE))); +static void pkt_print_data(u32 *data, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + u32 seqnum, pkt_nb; - fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); - fprintf(stdout, "---------------------------------------\n"); + seqnum = ntohl(*data) & 0xffff; + pkt_nb = ntohl(*data) >> 16; + fprintf(stdout, "%u:%u ", pkt_nb, seqnum); + data++; + } } -static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, - u64 pkt_stream_addr) +static void pkt_dump(void *pkt, u32 len, bool eth_header) +{ + struct ethhdr *ethhdr = pkt; + u32 i, *data; + + if (eth_header) { + /*extract L2 frame */ + fprintf(stdout, "DEBUG>> L2: dst mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_dest[i]); + + fprintf(stdout, "\nDEBUG>> L2: src mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_source[i]); + + data = pkt + PKT_HDR_SIZE; + } else { + data = pkt; + } + + /*extract L5 frame */ + fprintf(stdout, "\nDEBUG>> L5: seqnum: "); + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); + fprintf(stdout, "...."); + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { + fprintf(stdout, "\n.... "); + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, + PKT_DUMP_NB_TO_PRINT); + } + fprintf(stdout, "\n---------------------------------------\n"); +} + +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) { u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; - u32 offset = addr % umem->frame_size, expected_offset = 0; + u32 offset = addr % umem->frame_size, expected_offset; + int pkt_offset = pkt->valid ? pkt->offset : 0; - if (!pkt_stream->use_addr_for_fill) - pkt_stream_addr = 0; + if (!umem->unaligned_mode) + pkt_offset = 0; - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; if (offset == expected_offset) return true; @@ -806,9 +756,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) void *data = xsk_umem__get_data(buffer, addr); struct xdp_info *meta = data - sizeof(struct xdp_info); - if (meta->count != pkt->payload) { + if (meta->count != pkt->pkt_nb) { ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->payload, meta->count); + __func__, pkt->pkt_nb, meta->count); return false; } @@ -818,11 +768,11 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) { void *data = xsk_umem__get_data(buffer, addr); - struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + u32 seqnum, pkt_data; if (!pkt) { ksft_print_msg("[%s] too many packets received\n", __func__); - return false; + goto error; } if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { @@ -833,28 +783,23 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) if (pkt->len != len) { ksft_print_msg("[%s] expected length [%d], got length [%d]\n", __func__, pkt->len, len); - return false; + goto error; } - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { - u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); - - if (opt_pkt_dump) - pkt_dump(data, PKT_SIZE); + pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); + seqnum = pkt_data >> 16; - if (pkt->payload != seqnum) { - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", - __func__, pkt->payload, seqnum); - return false; - } - } else { - ksft_print_msg("Invalid frame received: "); - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, - iphdr->tos); - return false; + if (pkt->pkt_nb != seqnum) { + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", + __func__, pkt->pkt_nb, seqnum); + goto error; } return true; + +error: + pkt_dump(data, len, true); + return false; } static void kick_tx(struct xsk_socket_info *xsk) @@ -976,7 +921,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) addr = xsk_umem__add_offset_to_addr(addr); if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || - !is_offset_correct(umem, pkt_stream, addr, pkt->addr) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) return TEST_FAILURE; @@ -992,8 +937,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) pthread_mutex_lock(&pacing_mutex); pkts_in_flight -= pkts_sent; - if (pkts_in_flight < umem->num_frames) - pthread_cond_signal(&pacing_cond); pthread_mutex_unlock(&pacing_mutex); pkts_sent = 0; } @@ -1001,14 +944,21 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, - bool timeout) +static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) { struct xsk_socket_info *xsk = ifobject->xsk; + struct xsk_umem_info *umem = ifobject->umem; + u32 i, idx = 0, valid_pkts = 0, buffer_len; bool use_poll = ifobject->use_poll; - u32 i, idx = 0, valid_pkts = 0; int ret; + buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len); + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { + kick_tx(xsk); + return TEST_CONTINUE; + } + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { ret = poll(fds, 1, POLL_TMOUT); @@ -1034,25 +984,21 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd for (i = 0; i < BATCH_SIZE; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); + struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream); if (!pkt) break; - tx_desc->addr = pkt->addr; + tx_desc->addr = pkt_get_addr(pkt, umem); tx_desc->len = pkt->len; - (*pkt_nb)++; - if (pkt->valid) + if (pkt->valid) { valid_pkts++; + pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0); + } } pthread_mutex_lock(&pacing_mutex); pkts_in_flight += valid_pkts; - /* pkts_in_flight might be negative if many invalid packets are sent */ - if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { - kick_tx(xsk); - pthread_cond_wait(&pacing_cond, &pacing_mutex); - } pthread_mutex_unlock(&pacing_mutex); xsk_ring_prod__submit(&xsk->tx, i); @@ -1088,18 +1034,21 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { + struct pkt_stream *pkt_stream = ifobject->pkt_stream; bool timeout = !is_umem_valid(test->ifobj_rx); struct pollfd fds = { }; - u32 pkt_cnt = 0, ret; + u32 ret; fds.fd = xsk_socket__fd(ifobject->xsk->xsk); fds.events = POLLOUT; - while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { + ret = __send_pkts(ifobject, &fds, timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; if ((ret || test->fail) && !timeout) return TEST_FAILURE; - else if (ret == TEST_PASS && timeout) + if (ret == TEST_PASS && timeout) return ret; } @@ -1249,11 +1198,14 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje ifobject->xsk = &ifobject->xsk_arr[0]; ifobject->xskmap = test->ifobj_rx->xskmap; memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); + ifobject->umem->base_addr = 0; } -static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) +static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, + bool fill_up) { - u32 idx = 0, i, buffers_to_fill; + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; int ret; if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) @@ -1264,22 +1216,33 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); if (ret != buffers_to_fill) exit_with_error(ENOSPC); - for (i = 0; i < buffers_to_fill; i++) { - u64 addr; - if (pkt_stream->use_addr_for_fill) { - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); + while (filled < buffers_to_fill) { + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); + u64 addr; + u32 i; + + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) { + if (!pkt) { + if (!fill_up) + break; + addr = filled * umem->frame_size + umem->base_addr; + } else if (pkt->offset >= 0) { + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); + } else { + addr = pkt->offset + umem_alloc_buffer(umem); + } - if (!pkt) + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + if (++filled >= buffers_to_fill) break; - addr = pkt->addr; - } else { - addr = i * umem->frame_size; } - - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; } - xsk_ring_prod__submit(&umem->fq, i); + xsk_ring_prod__submit(&umem->fq, filled); + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); + + pkt_stream_reset(pkt_stream); + umem_reset_alloc(umem); } static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) @@ -1300,12 +1263,10 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (bufs == MAP_FAILED) exit_with_error(errno); - ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); if (ret) exit_with_error(-ret); - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); - xsk_configure_socket(test, ifobject, ifobject->umem, false); ifobject->xsk = &ifobject->xsk_arr[0]; @@ -1313,6 +1274,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); if (ret) exit_with_error(errno); @@ -1370,12 +1333,8 @@ static void *worker_testapp_validate_rx(void *arg) if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); - if (err) { + if (err) report_failure(test); - pthread_mutex_lock(&pacing_mutex); - pthread_cond_signal(&pacing_cond); - pthread_mutex_unlock(&pacing_mutex); - } pthread_exit(NULL); } @@ -1402,11 +1361,20 @@ static void handler(int signum) pthread_exit(NULL); } -static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj) +static bool xdp_prog_changed_rx(struct test_spec *test) { + struct ifobject *ifobj = test->ifobj_rx; + return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; } +static bool xdp_prog_changed_tx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_tx; + + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; +} + static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, struct bpf_map *xskmap, enum test_mode mode) { @@ -1433,13 +1401,13 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, struct ifobject *ifobj_tx) { - if (xdp_prog_changed(test, ifobj_rx)) + if (xdp_prog_changed_rx(test)) xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); if (!ifobj_tx || ifobj_tx->shared_umem) return; - if (xdp_prog_changed(test, ifobj_tx)) + if (xdp_prog_changed_tx(test)) xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); } @@ -1448,9 +1416,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i { pthread_t t0, t1; - if (ifobj2) + if (ifobj2) { if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); + pkt_stream_reset(ifobj2->pkt_stream); + } test->current_step++; pkt_stream_reset(ifobj1->pkt_stream); @@ -1493,6 +1463,12 @@ static int testapp_validate_traffic(struct test_spec *test) struct ifobject *ifobj_rx = test->ifobj_rx; struct ifobject *ifobj_tx = test->ifobj_tx; + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { + ksft_test_result_skip("No huge pages present.\n"); + return TEST_SKIP; + } + xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); } @@ -1502,16 +1478,18 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct return __testapp_validate_traffic(test, ifobj, NULL); } -static void testapp_teardown(struct test_spec *test) +static int testapp_teardown(struct test_spec *test) { int i; test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; test_spec_reset(test); } + + return TEST_PASS; } static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) @@ -1526,20 +1504,23 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) *ifobj2 = tmp_ifobj; } -static void testapp_bidi(struct test_spec *test) +static int testapp_bidi(struct test_spec *test) { + int res; + test_spec_set_name(test, "BIDIRECTIONAL"); test->ifobj_tx->rx_on = true; test->ifobj_rx->tx_on = true; test->total_steps = 2; if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; print_verbose("Switching Tx/Rx vectors\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); - __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); swap_directions(&test->ifobj_rx, &test->ifobj_tx); + return res; } static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) @@ -1556,160 +1537,139 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj exit_with_error(errno); } -static void testapp_bpf_res(struct test_spec *test) +static int testapp_bpf_res(struct test_spec *test) { test_spec_set_name(test, "BPF_RES"); test->total_steps = 2; test->nb_sockets = 2; if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_headroom(struct test_spec *test) +static int testapp_headroom(struct test_spec *test) { test_spec_set_name(test, "UMEM_HEADROOM"); test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_rx_dropped(struct test_spec *test) +static int testapp_stats_rx_dropped(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_DROPPED"); + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); + return TEST_SKIP; + } + pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; pkt_stream_receive_half(test); test->ifobj_rx->validation_func = validate_rx_dropped; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_tx_invalid_descs(struct test_spec *test) +static int testapp_stats_tx_invalid_descs(struct test_spec *test) { test_spec_set_name(test, "STAT_TX_INVALID"); pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); test->ifobj_tx->validation_func = validate_tx_invalid_descs; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_rx_full(struct test_spec *test) +static int testapp_stats_rx_full(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_FULL"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; test->ifobj_rx->validation_func = validate_rx_full; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_fill_empty(struct test_spec *test) +static int testapp_stats_fill_empty(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; - testapp_validate_traffic(test); -} - -/* Simple test */ -static bool hugepages_present(struct ifobject *ifobject) -{ - size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; - void *bufs; - - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0); - if (bufs == MAP_FAILED) - return false; - - mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; - munmap(bufs, mmap_sz); - return true; + return testapp_validate_traffic(test); } -static bool testapp_unaligned(struct test_spec *test) +static int testapp_unaligned(struct test_spec *test) { - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return false; - } - test_spec_set_name(test, "UNALIGNED_MODE"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; - /* Let half of the packets straddle a buffer boundrary */ - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); + /* Let half of the packets straddle a 4K buffer boundary */ + pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2); - return true; + return testapp_validate_traffic(test); } -static void testapp_single_pkt(struct test_spec *test) +static int testapp_single_pkt(struct test_spec *test) { - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_invalid_desc(struct test_spec *test) +static int testapp_invalid_desc(struct test_spec *test) { - u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; struct pkt pkts[] = { /* Zero packet address allowed */ - {0, PKT_SIZE, 0, true}, + {0, MIN_PKT_SIZE, 0, true}, /* Allowed packet */ - {0x1000, PKT_SIZE, 0, true}, + {0, MIN_PKT_SIZE, 0, true}, /* Straddling the start of umem */ - {-2, PKT_SIZE, 0, false}, + {-2, MIN_PKT_SIZE, 0, false}, /* Packet too large */ - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, /* Up to end of umem allowed */ - {umem_size - PKT_SIZE, PKT_SIZE, 0, true}, + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, /* After umem ends */ - {umem_size, PKT_SIZE, 0, false}, + {umem_size, MIN_PKT_SIZE, 0, false}, /* Straddle the end of umem */ - {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a page boundrary */ - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a 2K boundrary */ - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 4K boundary */ + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 2K boundary */ + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, /* Valid packet for synch so that something is received */ - {0x4000, PKT_SIZE, 0, true}}; + {0, MIN_PKT_SIZE, 0, true}}; - if (test->ifobj_tx->umem->unaligned_mode) { - /* Crossing a page boundrary allowed */ + if (umem->unaligned_mode) { + /* Crossing a page boundary allowed */ pkts[7].valid = true; } - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { - /* Crossing a 2K frame size boundrary not allowed */ + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundary not allowed */ pkts[8].valid = false; } if (test->ifobj_tx->shared_umem) { - pkts[4].addr += umem_size; - pkts[5].addr += umem_size; - pkts[6].addr += umem_size; + pkts[4].offset += umem_size; + pkts[5].offset += umem_size; + pkts[6].offset += umem_size; } pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_xdp_drop(struct test_spec *test) +static int testapp_xdp_drop(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1719,10 +1679,10 @@ static void testapp_xdp_drop(struct test_spec *test) skel_rx->maps.xsk, skel_tx->maps.xsk); pkt_stream_receive_half(test); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_xdp_metadata_count(struct test_spec *test) +static int testapp_xdp_metadata_count(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1743,10 +1703,10 @@ static void testapp_xdp_metadata_count(struct test_spec *test) if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) exit_with_error(errno); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_poll_txq_tmout(struct test_spec *test) +static int testapp_poll_txq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_TXQ_FULL"); @@ -1754,14 +1714,14 @@ static void testapp_poll_txq_tmout(struct test_spec *test) /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ test->ifobj_tx->umem->frame_size = 2048; pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); - testapp_validate_traffic_single_thread(test, test->ifobj_tx); + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); } -static void testapp_poll_rxq_tmout(struct test_spec *test) +static int testapp_poll_rxq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_RXQ_EMPTY"); test->ifobj_rx->use_poll = true; - testapp_validate_traffic_single_thread(test, test->ifobj_rx); + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); } static int xsk_load_xdp_programs(struct ifobject *ifobj) @@ -1778,25 +1738,30 @@ static void xsk_unload_xdp_programs(struct ifobject *ifobj) xsk_xdp_progs__destroy(ifobj->xdp_progs); } +/* Simple test */ +static bool hugepages_present(void) +{ + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); + if (bufs == MAP_FAILED) + return false; + + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + munmap(bufs, mmap_sz); + return true; +} + static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - const char *dst_ip, const char *src_ip, const u16 dst_port, - const u16 src_port, thread_func_t func_ptr) + thread_func_t func_ptr) { - struct in_addr ip; int err; memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - inet_aton(dst_ip, &ip); - ifobj->dst_ip = ip.s_addr; - - inet_aton(src_ip, &ip); - ifobj->src_ip = ip.s_addr; - - ifobj->dst_port = dst_port; - ifobj->src_port = src_port; - ifobj->func_ptr = func_ptr; err = xsk_load_xdp_programs(ifobj); @@ -1804,94 +1769,87 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * printf("Error loading XDP program\n"); exit_with_error(err); } + + if (hugepages_present()) + ifobj->unaligned_supp = true; } static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) { + int ret = TEST_SKIP; + switch (type) { case TEST_TYPE_STATS_RX_DROPPED: - if (mode == TEST_MODE_ZC) { - ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); - return; - } - testapp_stats_rx_dropped(test); + ret = testapp_stats_rx_dropped(test); break; case TEST_TYPE_STATS_TX_INVALID_DESCS: - testapp_stats_tx_invalid_descs(test); + ret = testapp_stats_tx_invalid_descs(test); break; case TEST_TYPE_STATS_RX_FULL: - testapp_stats_rx_full(test); + ret = testapp_stats_rx_full(test); break; case TEST_TYPE_STATS_FILL_EMPTY: - testapp_stats_fill_empty(test); + ret = testapp_stats_fill_empty(test); break; case TEST_TYPE_TEARDOWN: - testapp_teardown(test); + ret = testapp_teardown(test); break; case TEST_TYPE_BIDI: - testapp_bidi(test); + ret = testapp_bidi(test); break; case TEST_TYPE_BPF_RES: - testapp_bpf_res(test); + ret = testapp_bpf_res(test); break; case TEST_TYPE_RUN_TO_COMPLETION: test_spec_set_name(test, "RUN_TO_COMPLETION"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); - testapp_single_pkt(test); + ret = testapp_single_pkt(test); break; case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); - testapp_validate_traffic(test); + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_RX_POLL: test->ifobj_rx->use_poll = true; test_spec_set_name(test, "POLL_RX"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_TX_POLL: test->ifobj_tx->use_poll = true; test_spec_set_name(test, "POLL_TX"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_POLL_TXQ_TMOUT: - testapp_poll_txq_tmout(test); + ret = testapp_poll_txq_tmout(test); break; case TEST_TYPE_POLL_RXQ_TMOUT: - testapp_poll_rxq_tmout(test); + ret = testapp_poll_rxq_tmout(test); break; case TEST_TYPE_ALIGNED_INV_DESC: test_spec_set_name(test, "ALIGNED_INV_DESC"); - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC: - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } test_spec_set_name(test, "UNALIGNED_INV_DESC"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { u64 page_size, umem_size; - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); /* Odd frame size so the UMEM doesn't end near a page boundary. */ test->ifobj_tx->umem->frame_size = 4001; @@ -1903,29 +1861,28 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ */ page_size = sysconf(_SC_PAGESIZE); umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; - assert(umem_size % page_size > PKT_SIZE); - assert(umem_size % page_size < page_size - PKT_SIZE); - testapp_invalid_desc(test); + assert(umem_size % page_size > MIN_PKT_SIZE); + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); + ret = testapp_invalid_desc(test); break; } case TEST_TYPE_UNALIGNED: - if (!testapp_unaligned(test)) - return; + ret = testapp_unaligned(test); break; case TEST_TYPE_HEADROOM: - testapp_headroom(test); + ret = testapp_headroom(test); break; case TEST_TYPE_XDP_DROP_HALF: - testapp_xdp_drop(test); + ret = testapp_xdp_drop(test); break; case TEST_TYPE_XDP_METADATA_COUNT: - testapp_xdp_metadata_count(test); + ret = testapp_xdp_metadata_count(test); break; default: break; } - if (!test->fail) + if (ret == TEST_PASS) ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), test->name); pkt_stream_restore_default(test); @@ -2030,14 +1987,12 @@ int main(int argc, char **argv) modes++; } - init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, - worker_testapp_validate_rx); - init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, - worker_testapp_validate_tx); + init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); + init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); test_spec_init(&test, ifobj_tx, ifobj_rx, 0); - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE); + tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) exit_with_error(ENOMEM); test.tx_pkt_stream_default = tx_pkt_stream_default; diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index c535aeab2ca3..aaf27e067640 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -30,22 +30,14 @@ #define TEST_PASS 0 #define TEST_FAILURE -1 #define TEST_CONTINUE 1 +#define TEST_SKIP 2 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 #define MAX_SOCKETS 2 #define MAX_TEST_NAME_SIZE 32 #define MAX_TEARDOWN_ITER 10 -#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ - sizeof(struct udphdr)) -#define MIN_ETH_PKT_SIZE 64 -#define ETH_FCS_SIZE 4 -#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) -#define PKT_SIZE (MIN_PKT_SIZE) -#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) -#define IP_PKT_VER 0x4 -#define IP_PKT_TOS 0x9 -#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) -#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ +#define MIN_PKT_SIZE 64 #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 @@ -57,6 +49,7 @@ #define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) #define HUGEPAGE_SIZE (2 * 1024 * 1024) +#define PKT_DUMP_NB_TO_PRINT 16 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -93,13 +86,13 @@ enum test_type { TEST_TYPE_MAX }; -static bool opt_pkt_dump; static bool opt_verbose; struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; struct xsk_umem *umem; + u64 next_buffer; u32 num_frames; u32 frame_headroom; void *buffer; @@ -118,17 +111,17 @@ struct xsk_socket_info { }; struct pkt { - u64 addr; + int offset; u32 len; - u32 payload; + u32 pkt_nb; bool valid; }; struct pkt_stream { u32 nb_pkts; - u32 rx_pkt_nb; + u32 current_pkt_nb; struct pkt *pkts; - bool use_addr_for_fill; + u32 max_pkt_len; }; struct ifobject; @@ -148,11 +141,7 @@ struct ifobject { struct bpf_program *xdp_prog; enum test_mode mode; int ifindex; - u32 dst_ip; - u32 src_ip; u32 bind_flags; - u16 src_port; - u16 dst_port; bool tx_on; bool rx_on; bool use_poll; @@ -161,6 +150,7 @@ struct ifobject { bool release_rx; bool shared_umem; bool use_metadata; + bool unaligned_supp; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -184,7 +174,6 @@ struct test_spec { pthread_barrier_t barr; pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; int pkts_in_flight; |