diff options
Diffstat (limited to 'tools/testing')
113 files changed, 5622 insertions, 1127 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 90f3c9802ffb..95dc58b94178 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -62,5 +62,6 @@ cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o +cxl_core-y += cxl_core_exports.o obj-m += test/ diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c new file mode 100644 index 000000000000..077e6883921d --- /dev/null +++ b/tools/testing/cxl/cxl_core_exports.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include "cxl.h" + +/* Exporting of cxl_core symbols that are only used by cxl_test */ +EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, CXL); diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index b88546299902..f4e517a0c774 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -669,10 +669,11 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) return 0; dev_dbg(&port->dev, "%s commit\n", dev_name(&cxld->dev)); - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h index 134f8eab0768..71546e15bdd3 100644 --- a/tools/testing/memblock/linux/mmzone.h +++ b/tools/testing/memblock/linux/mmzone.h @@ -17,10 +17,10 @@ enum zone_type { }; #define MAX_NR_ZONES __MAX_NR_ZONES -#define MAX_ORDER 10 -#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) +#define MAX_PAGE_ORDER 10 +#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) -#define pageblock_order MAX_ORDER +#define pageblock_order MAX_PAGE_ORDER #define pageblock_nr_pages BIT(pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index fd26189d53be..b8419f460368 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -924,7 +924,7 @@ static __init int ndtest_init(void) nfit_test_setup(ndtest_resource_lookup, NULL); - rc = class_regster(&ndtest_dimm_class); + rc = class_register(&ndtest_dimm_class); if (rc) goto err_register; diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 61fe2601cb3a..4eb442206d01 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -93,13 +93,9 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, return p; } -void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +void __kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) { assert(objp); - uatomic_dec(&nr_allocated); - uatomic_dec(&cachep->nr_allocated); - if (kmalloc_verbose) - printf("Freeing %p to slab\n", objp); if (cachep->nr_objs > 10 || cachep->align) { memset(objp, POISON_FREE, cachep->size); free(objp); @@ -111,6 +107,15 @@ void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) } } +void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +{ + uatomic_dec(&nr_allocated); + uatomic_dec(&cachep->nr_allocated); + if (kmalloc_verbose) + printf("Freeing %p to slab\n", objp); + __kmem_cache_free_locked(cachep, objp); +} + void kmem_cache_free(struct kmem_cache *cachep, void *objp) { pthread_mutex_lock(&cachep->lock); @@ -141,18 +146,17 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, if (kmalloc_verbose) pr_debug("Bulk alloc %lu\n", size); - if (!(gfp & __GFP_DIRECT_RECLAIM)) { - if (cachep->non_kernel < size) - return 0; - - cachep->non_kernel -= size; - } - pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { struct radix_tree_node *node; for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + node = cachep->objs; cachep->nr_objs--; cachep->objs = node->parent; @@ -163,11 +167,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } else { pthread_mutex_unlock(&cachep->lock); for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + if (cachep->align) { posix_memalign(&p[i], cachep->align, cachep->size); } else { p[i] = malloc(cachep->size); + if (!p[i]) + break; } if (cachep->ctor) cachep->ctor(p[i]); @@ -176,6 +188,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } } + if (i < size) { + size = i; + pthread_mutex_lock(&cachep->lock); + for (i = 0; i < size; i++) + __kmem_cache_free_locked(cachep, p[i]); + pthread_mutex_unlock(&cachep->lock); + return 0; + } + for (i = 0; i < size; i++) { uatomic_inc(&nr_allocated); uatomic_inc(&cachep->nr_allocated); diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h index 7d8d1f445b89..06c89bdcc515 100644 --- a/tools/testing/radix-tree/linux/maple_tree.h +++ b/tools/testing/radix-tree/linux/maple_tree.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0+ */ #define atomic_t int32_t -#include "../../../../include/linux/maple_tree.h" #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) #define atomic_set(x, y) do {} while (0) #define U8_MAX UCHAR_MAX +#include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index e5da1cad70ba..f1caf4bcf937 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -118,6 +118,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas.alloc == NULL); MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); mas_push_node(&mas, mn); + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* free */ mtree_unlock(mt); @@ -141,7 +142,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - mas.node = MAS_START; + mas.status = ma_start; mas_nomem(&mas, GFP_KERNEL); /* Allocate 3 nodes, will fail. */ mas_node_count(&mas, 3); @@ -158,6 +159,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) /* Ensure we counted 3. */ MT_BUG_ON(mt, mas_allocated(&mas) != 3); /* Free. */ + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* Set allocation request to 1. */ @@ -272,6 +274,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) ma_free_rcu(mn); MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); } + mas_reset(&mas); MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL)); } @@ -294,6 +297,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) smn = smn->slot[0]; /* next. */ } MT_BUG_ON(mt, mas_allocated(&mas) != total); + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* Free. */ MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -441,7 +445,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mas.node = MA_ERROR(-ENOMEM); mas_node_count(&mas, 10); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.node = MAS_START; + mas.status = ma_start; MT_BUG_ON(mt, mas_allocated(&mas) != 10); mas_destroy(&mas); @@ -452,7 +456,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mas.node = MA_ERROR(-ENOMEM); mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.node = MAS_START; + mas.status = ma_start; MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); mas_destroy(&mas); @@ -941,10 +945,11 @@ retry: ret = mas_descend_walk(mas, range_min, range_max); if (unlikely(mte_dead_node(mas->node))) { - mas->node = MAS_START; + mas->status = ma_start; goto retry; } + mas->end = mas_data_end(mas); return ret; not_found: @@ -960,17 +965,19 @@ static inline void *mas_range_load(struct ma_state *mas, unsigned long index = mas->index; if (mas_is_none(mas) || mas_is_paused(mas)) - mas->node = MAS_START; + mas->status = ma_start; retry: if (mas_tree_walk(mas, range_min, range_max)) - if (unlikely(mas->node == MAS_ROOT)) + if (unlikely(mas->status == ma_root)) return mas_root(mas); if (likely(mas->offset != MAPLE_NODE_SLOTS)) entry = mas_get_slot(mas, mas->offset); - if (mas_dead_node(mas, index)) + if (mas_is_active(mas) && mte_dead_node(mas->node)) { + mas_set(mas, index); goto retry; + } return entry; } @@ -34132,7 +34139,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set27, ARRAY_SIZE(set27)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 140415537422336)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 140415537422336)); mt_set_non_kernel(0); mt_validate(mt); mtree_destroy(mt); @@ -34256,7 +34263,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set37, ARRAY_SIZE(set37)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712)); mt_validate(mt); mtree_destroy(mt); @@ -34264,7 +34271,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set38, ARRAY_SIZE(set38)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712)); mt_validate(mt); mtree_destroy(mt); @@ -35336,7 +35343,7 @@ static void mas_dfs_preorder(struct ma_state *mas) unsigned char end, slot = 0; unsigned long *pivots; - if (mas->node == MAS_START) { + if (mas->status == ma_start) { mas_start(mas); return; } @@ -35373,7 +35380,7 @@ walk_up: return; done: - mas->node = MAS_NONE; + mas->status = ma_none; } @@ -35538,7 +35545,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1); + MT_BUG_ON(mt, allocated != 0); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -35832,7 +35839,7 @@ static noinline void __init check_nomem(struct maple_tree *mt) mas_store(&ms, &ms); /* insert 1 -> &ms, fails. */ MT_BUG_ON(mt, ms.node != MA_ERROR(-ENOMEM)); mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */ - MT_BUG_ON(mt, ms.node != MAS_START); + MT_BUG_ON(mt, ms.status != ma_start); mtree_unlock(mt); MT_BUG_ON(mt, mtree_insert(mt, 2, mt, GFP_KERNEL) != 0); mtree_lock(mt); @@ -35857,6 +35864,363 @@ static noinline void __init check_locky(struct maple_tree *mt) mt_clear_in_rcu(mt); } +/* + * Compares two nodes except for the addresses stored in the nodes. + * Returns zero if they are the same, otherwise returns non-zero. + */ +static int __init compare_node(struct maple_enode *enode_a, + struct maple_enode *enode_b) +{ + struct maple_node *node_a, *node_b; + struct maple_node a, b; + void **slots_a, **slots_b; /* Do not use the rcu tag. */ + enum maple_type type; + int i; + + if (((unsigned long)enode_a & MAPLE_NODE_MASK) != + ((unsigned long)enode_b & MAPLE_NODE_MASK)) { + pr_err("The lower 8 bits of enode are different.\n"); + return -1; + } + + type = mte_node_type(enode_a); + node_a = mte_to_node(enode_a); + node_b = mte_to_node(enode_b); + a = *node_a; + b = *node_b; + + /* Do not compare addresses. */ + if (ma_is_root(node_a) || ma_is_root(node_b)) { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MA_ROOT_PARENT); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MA_ROOT_PARENT); + } else { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MAPLE_NODE_MASK); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MAPLE_NODE_MASK); + } + + if (a.parent != b.parent) { + pr_err("The lower 8 bits of parents are different. %p %p\n", + a.parent, b.parent); + return -1; + } + + /* + * If it is a leaf node, the slots do not contain the node address, and + * no special processing of slots is required. + */ + if (ma_is_leaf(type)) + goto cmp; + + slots_a = ma_slots(&a, type); + slots_b = ma_slots(&b, type); + + for (i = 0; i < mt_slots[type]; i++) { + if (!slots_a[i] && !slots_b[i]) + break; + + if (!slots_a[i] || !slots_b[i]) { + pr_err("The number of slots is different.\n"); + return -1; + } + + /* Do not compare addresses in slots. */ + ((unsigned long *)slots_a)[i] &= MAPLE_NODE_MASK; + ((unsigned long *)slots_b)[i] &= MAPLE_NODE_MASK; + } + +cmp: + /* + * Compare all contents of two nodes, including parent (except address), + * slots (except address), pivots, gaps and metadata. + */ + return memcmp(&a, &b, sizeof(struct maple_node)); +} + +/* + * Compare two trees and return 0 if they are the same, non-zero otherwise. + */ +static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) +{ + MA_STATE(mas_a, mt_a, 0, 0); + MA_STATE(mas_b, mt_b, 0, 0); + + if (mt_a->ma_flags != mt_b->ma_flags) { + pr_err("The flags of the two trees are different.\n"); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + + if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) { + if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) { + pr_err("One is ma_root and the other is not.\n"); + return -1; + } + return 0; + } + + while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) { + + if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) { + pr_err("One is ma_none and the other is not.\n"); + return -1; + } + + if (mas_a.min != mas_b.min || + mas_a.max != mas_b.max) { + pr_err("mas->min, mas->max do not match.\n"); + return -1; + } + + if (compare_node(mas_a.node, mas_b.node)) { + pr_err("The contents of nodes %p and %p are different.\n", + mas_a.node, mas_b.node); + mt_dump(mt_a, mt_dump_dec); + mt_dump(mt_b, mt_dump_dec); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + } + + return 0; +} + +static __init void mas_subtree_max_range(struct ma_state *mas) +{ + unsigned long limit = mas->max; + MA_STATE(newmas, mas->tree, 0, 0); + void *entry; + + mas_for_each(mas, entry, limit) { + if (mas->last - mas->index >= + newmas.last - newmas.index) { + newmas = *mas; + } + } + + *mas = newmas; +} + +/* + * build_full_tree() - Build a full tree. + * @mt: The tree to build. + * @flags: Use @flags to build the tree. + * @height: The height of the tree to build. + * + * Build a tree with full leaf nodes and internal nodes. Note that the height + * should not exceed 3, otherwise it will take a long time to build. + * Return: zero if the build is successful, non-zero if it fails. + */ +static __init int build_full_tree(struct maple_tree *mt, unsigned int flags, + int height) +{ + MA_STATE(mas, mt, 0, 0); + unsigned long step; + int ret = 0, cnt = 1; + enum maple_type type; + + mt_init_flags(mt, flags); + mtree_insert_range(mt, 0, ULONG_MAX, xa_mk_value(5), GFP_KERNEL); + + mtree_lock(mt); + + while (1) { + mas_set(&mas, 0); + if (mt_height(mt) < height) { + mas.max = ULONG_MAX; + goto store; + } + + while (1) { + mas_dfs_preorder(&mas); + if (mas_is_none(&mas)) + goto unlock; + + type = mte_node_type(mas.node); + if (mas_data_end(&mas) + 1 < mt_slots[type]) { + mas_set(&mas, mas.min); + goto store; + } + } +store: + mas_subtree_max_range(&mas); + step = mas.last - mas.index; + if (step < 1) { + ret = -1; + goto unlock; + } + + step /= 2; + mas.last = mas.index + step; + mas_store_gfp(&mas, xa_mk_value(5), + GFP_KERNEL); + ++cnt; + } +unlock: + mtree_unlock(mt); + + MT_BUG_ON(mt, mt_height(mt) != height); + /* pr_info("height:%u number of elements:%d\n", mt_height(mt), cnt); */ + return ret; +} + +static noinline void __init check_mtree_dup(struct maple_tree *mt) +{ + DEFINE_MTREE(new); + int i, j, ret, count = 0; + unsigned int rand_seed = 17, rand; + + /* store a value at [0, 0] */ + mt_init_flags(mt, 0); + mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + + /* The two trees have different attributes. */ + mt_init_flags(mt, 0); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* The new tree is not empty */ + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + mtree_store(&new, 5, xa_mk_value(5), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Test for duplicating full trees. */ + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, 0, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, 0); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, MT_FLAGS_ALLOC_RANGE, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test for normal duplicating. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test memory allocation failed. */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i < 30; i += 3) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + /* Failed at the first node. */ + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + mt_set_non_kernel(0); + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + MT_BUG_ON(&new, ret != -ENOMEM); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Random maple tree fails at a random node. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + /* + * The rand() library function is not used, so we can generate + * the same random numbers on any platform. + */ + rand_seed = rand_seed * 1103515245 + 12345; + rand = rand_seed / 65536 % 128; + mt_set_non_kernel(rand); + + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + if (ret != 0) { + MT_BUG_ON(&new, ret != -ENOMEM); + count++; + mtree_destroy(mt); + continue; + } + + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* pr_info("mtree_dup() fail %d times\n", count); */ + BUG_ON(!count); +} + extern void test_kmem_cache_bulk(void); void farmer_tests(void) @@ -35904,6 +36268,10 @@ void farmer_tests(void) check_null_expand(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, 0); + check_mtree_dup(&tree); + mtree_destroy(&tree); + /* RCU testing */ mt_init_flags(&tree, 0); check_erase_testset(&tree); @@ -35938,7 +36306,9 @@ void farmer_tests(void) void maple_tree_tests(void) { +#if !defined(BENCH) farmer_tests(); +#endif maple_tree_seed(); maple_tree_harvest(); } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3b2061d1c1a5..601049886963 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -26,6 +26,8 @@ TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll TARGETS += filesystems/fat +TARGETS += filesystems/overlayfs +TARGETS += filesystems/statmount TARGETS += firmware TARGETS += fpu TARGETS += ftrace @@ -43,6 +45,7 @@ TARGETS += landlock TARGETS += lib TARGETS += livepatch TARGETS += lkdtm +TARGETS += lsm TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug @@ -155,12 +158,10 @@ ifneq ($(KBUILD_OUTPUT),) abs_objtree := $(realpath $(abs_objtree)) BUILD := $(abs_objtree)/kselftest KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include - KHDR_DIR := ${abs_objtree}/usr/include else BUILD := $(CURDIR) abs_srctree := $(shell cd $(top_srcdir) && pwd) KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include - KHDR_DIR := ${abs_srctree}/usr/include DEFAULT_INSTALL_HDR_PATH := 1 endif @@ -174,7 +175,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -all: kernel_header_files +all: @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -185,23 +186,6 @@ all: kernel_header_files ret=$$((ret * $$?)); \ done; exit $$ret; -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files - run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index 21e482b23f50..23df154fcdd7 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -138,7 +138,7 @@ static void find_controls(void) err = snd_ctl_elem_info(card_data->handle, ctl_data->info); if (err < 0) { - ksft_print_msg("%s getting info for %d\n", + ksft_print_msg("%s getting info for %s\n", snd_strerror(err), ctl_data->name); } diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index 351a098b503a..02ee3a91b780 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -254,6 +254,12 @@ static int write_clone_read(void) putnum(++tests_run); \ putstr(" " #name "\n"); +#define skip_test(name) \ + tests_skipped++; \ + putstr("ok "); \ + putnum(++tests_run); \ + putstr(" # SKIP " #name "\n"); + int main(int argc, char **argv) { int ret, i; @@ -283,13 +289,11 @@ int main(int argc, char **argv) } else { putstr("# SME support not present\n"); - for (i = 0; i < EXPECTED_TESTS; i++) { - putstr("ok "); - putnum(i); - putstr(" skipped, TPIDR2 not supported\n"); - } - - tests_skipped += EXPECTED_TESTS; + skip_test(default_value); + skip_test(write_read); + skip_test(write_sleep_read); + skip_test(write_fork_read); + skip_test(write_clone_read); } print_summary(); diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 547d077e3517..fff60e2a25ad 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -515,6 +515,10 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size +#ifdef SSVE + mrs x13, S3_3_C4_C2_2 +#endif + puts "Mismatch: PID=" mov x0, x20 bl putdec @@ -534,6 +538,12 @@ function barf bl dumphex puts "]\n" +#ifdef SSVE + puts "\tSVCR: " + mov x0, x13 + bl putdecn +#endif + mov x8, #__NR_getpid svc #0 // fpsimd.c acitivty log dump hack diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 5f648b97a06f..ea9c7d47790f 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -66,6 +66,11 @@ static struct vec_data vec_data[] = { }, }; +static bool vec_type_supported(struct vec_data *data) +{ + return getauxval(data->hwcap_type) & data->hwcap; +} + static int stdio_read_integer(FILE *f, const char *what, int *val) { int n = 0; @@ -564,8 +569,11 @@ static void prctl_set_all_vqs(struct vec_data *data) return; } - for (i = 0; i < ARRAY_SIZE(vec_data); i++) + for (i = 0; i < ARRAY_SIZE(vec_data); i++) { + if (!vec_type_supported(&vec_data[i])) + continue; orig_vls[i] = vec_data[i].rdvl(); + } for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); @@ -594,7 +602,7 @@ static void prctl_set_all_vqs(struct vec_data *data) if (&vec_data[i] == data) continue; - if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap)) + if (!vec_type_supported(&vec_data[i])) continue; if (vec_data[i].rdvl() != orig_vls[i]) { @@ -765,7 +773,7 @@ int main(void) struct vec_data *data = &vec_data[i]; unsigned long supported; - supported = getauxval(data->hwcap_type) & data->hwcap; + supported = vec_type_supported(data); if (!supported) all_supported = false; diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c index b86cb1049497..587b94648222 100644 --- a/tools/testing/selftests/arm64/fp/za-fork.c +++ b/tools/testing/selftests/arm64/fp/za-fork.c @@ -85,7 +85,7 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - ksft_test_result(fork_test(), "fork_test"); + ksft_test_result(fork_test(), "fork_test\n"); } else { ksft_print_msg("SME not supported\n"); diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 9dcd70911397..095b45531640 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -333,6 +333,9 @@ function barf // mov w8, #__NR_exit // svc #0 // end hack + + mrs x13, S3_3_C4_C2_2 + smstop mov x10, x0 // expected data mov x11, x1 // actual data @@ -356,6 +359,9 @@ function barf mov x1, x12 bl dumphex puts "]\n" + puts "\tSVCR: " + mov x0, x13 + bl putdecn mov x8, #__NR_getpid svc #0 diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index d63286397638..b5c81e81a379 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -267,6 +267,8 @@ function barf // mov w8, #__NR_exit // svc #0 // end hack + + mrs x13, S3_3_C4_C2_2 smstop mov x10, x0 // expected data mov x11, x1 // actual data @@ -287,6 +289,9 @@ function barf mov x1, x12 bl dumphex puts "]\n" + puts "\tSVCR: " + mov x0, x13 + bl putdecn mov x8, #__NR_getpid svc #0 diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..49a29dbc1910 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -50,7 +50,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index f75f84d0b3d7..7c2241fae19a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -524,6 +524,37 @@ out: test_sockmap_pass_prog__destroy(pass); } +static void test_sockmap_unconnected_unix(void) +{ + int err, map, stream = 0, dgram = 0, zero = 0; + struct test_sockmap_pass_prog *skel; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + stream = xsocket(AF_UNIX, SOCK_STREAM, 0); + if (stream < 0) + return; + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + close(stream); + return; + } + + err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); + ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + + close(stream); + close(dgram); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -566,4 +597,7 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); + + if (test__start_subtest("sockmap unconnected af_unix")) + test_sockmap_unconnected_unix(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index a934d430c20c..a92807bfcd13 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1337,7 +1337,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, - int sock_mapfd, int verd_mapfd, enum redir_mode mode) + int sock_mapfd, int nop_mapfd, + int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); unsigned int pass; @@ -1351,6 +1352,12 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, if (err) return; + if (nop_mapfd >= 0) { + err = add_to_sockmap(nop_mapfd, cli0, cli1); + if (err) + return; + } + n = write(cli1, "a", 1); if (n < 0) FAIL_ERRNO("%s: write", log_prefix); @@ -1387,7 +1394,7 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, goto close0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1677,7 +1684,7 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1735,7 +1742,7 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1770,8 +1777,10 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static void unix_inet_redir_to_connected(int family, int type, + int sock_mapfd, int nop_mapfd, + int verd_mapfd, + enum redir_mode mode) { int c0, c1, p0, p1; int sfd[2]; @@ -1785,7 +1794,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, + sock_mapfd, nop_mapfd, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1799,6 +1809,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, struct bpf_map *inner_map, int family) { int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int nop_map = bpf_map__fd(skel->maps.nop_map); int verdict_map = bpf_map__fd(skel->maps.verdict_map); int sock_map = bpf_map__fd(inner_map); int err; @@ -1808,14 +1819,32 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, return; skel->bss->test_ingress = false; - unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, REDIR_EGRESS); - unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, + REDIR_EGRESS); + + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, nop_map, verdict_map, + REDIR_EGRESS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, REDIR_EGRESS); skel->bss->test_ingress = true; - unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, + REDIR_INGRESS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, -1, verdict_map, + REDIR_INGRESS); + + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, nop_map, verdict_map, REDIR_INGRESS); - unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, REDIR_INGRESS); xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index fc6b2954e8f5..59993fc9c0d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> #include <test_progs.h> #include <network_helpers.h> +#include "tailcall_poke.skel.h" + /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1105,6 +1108,85 @@ out: bpf_object__close(tgt_obj); } +#define JMP_TABLE "/sys/fs/bpf/jmp_table" + +static int poke_thread_exit; + +static void *poke_update(void *arg) +{ + __u32 zero = 0, prog1_fd, prog2_fd, map_fd; + struct tailcall_poke *call = arg; + + map_fd = bpf_map__fd(call->maps.jmp_table); + prog1_fd = bpf_program__fd(call->progs.call1); + prog2_fd = bpf_program__fd(call->progs.call2); + + while (!poke_thread_exit) { + bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY); + bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY); + } + + return NULL; +} + +/* + * We are trying to hit prog array update during another program load + * that shares the same prog array map. + * + * For that we share the jmp_table map between two skeleton instances + * by pinning the jmp_table to same path. Then first skeleton instance + * periodically updates jmp_table in 'poke update' thread while we load + * the second skeleton instance in the main thread. + */ +static void test_tailcall_poke(void) +{ + struct tailcall_poke *call, *test; + int err, cnt = 10; + pthread_t thread; + + unlink(JMP_TABLE); + + call = tailcall_poke__open_and_load(); + if (!ASSERT_OK_PTR(call, "tailcall_poke__open")) + return; + + err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = pthread_create(&thread, NULL, poke_update, call); + if (!ASSERT_OK(err, "new toggler")) + goto out; + + while (cnt--) { + test = tailcall_poke__open(); + if (!ASSERT_OK_PTR(test, "tailcall_poke__open")) + break; + + err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) { + tailcall_poke__destroy(test); + break; + } + + bpf_program__set_autoload(test->progs.test, true); + bpf_program__set_autoload(test->progs.call1, false); + bpf_program__set_autoload(test->progs.call2, false); + + err = tailcall_poke__load(test); + tailcall_poke__destroy(test); + if (!ASSERT_OK(err, "tailcall_poke__load")) + break; + } + + poke_thread_exit = 1; + ASSERT_OK(pthread_join(thread, NULL), "pthread_join"); + +out: + bpf_map__unpin(call->maps.jmp_table, JMP_TABLE); + tailcall_poke__destroy(call); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -1139,4 +1221,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_fentry_fexit(); if (test__start_subtest("tailcall_bpf2bpf_fentry_entry")) test_tailcall_bpf2bpf_fentry_entry(); + if (test__start_subtest("tailcall_poke")) + test_tailcall_poke(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 6ee22c3b251a..518f143c5b0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -24,6 +24,7 @@ #include "test_progs.h" #include "network_helpers.h" +#include "netlink_helpers.h" #include "test_tc_neigh_fib.skel.h" #include "test_tc_neigh.skel.h" #include "test_tc_peer.skel.h" @@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb) } } +enum dev_mode { + MODE_VETH, + MODE_NETKIT, +}; + struct netns_setup_result { - int ifindex_veth_src; - int ifindex_veth_src_fwd; - int ifindex_veth_dst; - int ifindex_veth_dst_fwd; + enum dev_mode dev_mode; + int ifindex_src; + int ifindex_src_fwd; + int ifindex_dst; + int ifindex_dst_fwd; }; static int get_ifaddr(const char *name, char *ifaddr) @@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } +static int create_netkit(int mode, char *prim, char *peer) +{ + struct rtattr *linkinfo, *data, *peer_info; + struct rtnl_handle rth = { .fd = -1 }; + const char *type = "netkit"; + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; + } req = {}; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + req.i.ifi_family = AF_UNSPEC; + + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim)); + linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); + addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); + data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO); + req.n.nlmsg_len += sizeof(struct ifinfomsg); + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer)); + addattr_nest_end(&req.n, peer_info); + addattr_nest_end(&req.n, data); + addattr_nest_end(&req.n, linkinfo); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; - char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - - SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); - SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); + char src_fwd_addr[IFADDR_STR_LEN+1] = {}; + int err; - SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set veth_dst address " MAC_DST); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip link add src type veth peer name src_fwd"); + SYS(fail, "ip link add dst type veth peer name dst_fwd"); + + SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set dst address " MAC_DST); + } else if (result->dev_mode == MODE_NETKIT) { + err = create_netkit(NETKIT_L3, "src", "src_fwd"); + if (!ASSERT_OK(err, "create_ifindex_src")) + goto fail; + err = create_netkit(NETKIT_L3, "dst", "dst_fwd"); + if (!ASSERT_OK(err, "create_ifindex_dst")) + goto fail; + } - if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + if (get_ifaddr("src_fwd", src_fwd_addr)) goto fail; - result->ifindex_veth_src = if_nametoindex("veth_src"); - if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) + result->ifindex_src = if_nametoindex("src"); + if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) goto fail; - result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); - if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) + result->ifindex_src_fwd = if_nametoindex("src_fwd"); + if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd")) goto fail; - result->ifindex_veth_dst = if_nametoindex("veth_dst"); - if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) + result->ifindex_dst = if_nametoindex("dst"); + if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst")) goto fail; - result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); - if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) + result->ifindex_dst_fwd = if_nametoindex("dst_fwd"); + if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd")) goto fail; - SYS(fail, "ip link set veth_src netns " NS_SRC); - SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set src netns " NS_SRC); + SYS(fail, "ip link set src_fwd netns " NS_FWD); + SYS(fail, "ip link set dst_fwd netns " NS_FWD); + SYS(fail, "ip link set dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); - SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS(fail, "ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad"); + SYS(fail, "ip link set dev src up"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev src scope global"); - SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); - SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s", + src_fwd_addr); + SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s", + src_fwd_addr); + } close_netns(nstoken); @@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS(fail, "ip link set dev veth_src_fwd up"); - SYS(fail, "ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd"); + SYS(fail, "ip link set dev src_fwd up"); + SYS(fail, "ip link set dev dst_fwd up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); close_netns(nstoken); @@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); - SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS(fail, "ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); + SYS(fail, "ip link set dev dst up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global"); - SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD); + } close_netns(nstoken); @@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog, const struct bpf_program *chk_prog, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); int err; - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress bpf da src_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); - /* tc filter add dev veth_src_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress bpf da src_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0); + /* tc filter add dev src_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); - /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da dst_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); + /* tc filter add dev dst_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); return 0; fail: @@ -539,10 +600,10 @@ done: static int netns_load_dtime_bpf(struct test_tc_dtime *skel, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst); struct nstoken *nstoken; int err; @@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel, nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return -1; - /* tc qdisc add dev veth_src clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); - /* tc filter add dev veth_src ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_src egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev src clsact */ + QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); + /* tc filter add dev src ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev src egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_dst tc progs */ nstoken = open_netns(NS_DST); if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) return -1; - /* tc qdisc add dev veth_dst clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); - /* tc filter add dev veth_dst ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_dst egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev dst clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst); + /* tc filter add dev dst ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev dst egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_fwd tc progs */ nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) return -1; - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); close_netns(nstoken); return 0; @@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) return; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_dtime__load(skel); if (!ASSERT_OK(err, "test_tc_dtime__load")) @@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_neigh__load(skel); if (!ASSERT_OK(err, "test_tc_neigh__load")) @@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) { LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); struct test_tc_peer *skel = NULL; struct nstoken *nstoken = NULL; int err; @@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) goto fail; skel->rodata->IFINDEX_SRC = ifindex; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) /* Load "tc_src_l3" to the tun_fwd interface to redirect packets * towards dst, and "tc_dst" to redirect packets - * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + * and "tc_chk" on dst_fwd to drop non-redirected packets. */ /* tc qdisc add dev tun_fwd clsact */ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); - /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); + /* tc filter add dev dst_fwd egress bpf da tc_chk */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); @@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global"); - SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; @@ -1106,9 +1167,9 @@ fail: close_netns(nstoken); } -#define RUN_TEST(name) \ +#define RUN_TEST(name, mode) \ ({ \ - struct netns_setup_result setup_result; \ + struct netns_setup_result setup_result = { .dev_mode = mode, }; \ if (test__start_subtest(#name)) \ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ @@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg) { netns_setup_namespaces_nofail("delete"); - RUN_TEST(tc_redirect_peer); - RUN_TEST(tc_redirect_peer_l3); - RUN_TEST(tc_redirect_neigh); - RUN_TEST(tc_redirect_neigh_fib); - RUN_TEST(tc_redirect_dtime); + RUN_TEST(tc_redirect_peer, MODE_VETH); + RUN_TEST(tc_redirect_peer, MODE_NETKIT); + RUN_TEST(tc_redirect_peer_l3, MODE_VETH); + RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); + RUN_TEST(tc_redirect_neigh, MODE_VETH); + RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); + RUN_TEST(tc_redirect_dtime, MODE_VETH); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e5c61aa6604a..5cfa7a6316b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -31,6 +31,7 @@ #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" +#include "verifier_iterating_callbacks.skel.h" #include "verifier_jeq_infer_not_null.skel.h" #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" @@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } +void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c index 4ce76eb064c4..d461746fd3c1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data) return 0; } +static int outer_loop(__u32 index, void *data) +{ + bpf_loop(nr_loops, empty_callback, NULL, 0); + __sync_add_and_fetch(&hits, nr_loops); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { - for (int i = 0; i < 1000; i++) { - bpf_loop(nr_loops, empty_callback, NULL, 0); - - __sync_add_and_fetch(&hits, nr_loops); - } + bpf_loop(1000, outer_loop, NULL, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 76d661b20e87..56c764df8196 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -33,6 +33,7 @@ int underflow_prog(void *ctx) if (!p) return 0; bpf_for_each_map_elem(&array_map, cb1, &p, 0); + bpf_kfunc_call_test_release(p); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 4c39e920dac2..8c0ef2742208 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx) return 0; bpf_spin_lock(&lock); bpf_rbtree_add(&rbtree, &f->node, rbless); + bpf_spin_unlock(&lock); return 0; } @@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx) if (!f) return 0; bpf_loop(5, subprog_cb_ref, NULL, 0); + bpf_obj_drop(f); return 0; } diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e02cfd380746..40df2cc26eaf 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -24,9 +24,11 @@ struct task_struct {}; #define STACK_TABLE_EPOCH_SHIFT 20 #define STROBE_MAX_STR_LEN 1 #define STROBE_MAX_CFGS 32 +#define READ_MAP_VAR_PAYLOAD_CAP \ + ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) #define STROBE_MAX_PAYLOAD \ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ - STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP) struct strobe_value_header { /* @@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, struct strobe_value_generic *value, struct strobemeta_payload *data, - void *payload) + size_t off) { void *location; uint64_t len; @@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr); /* * if bpf_probe_read_user_str returns error (<0), due to casting to * unsinged int, it will become big number, so next check is @@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; data->str_lens[idx] = len; - return len; + return off + len; } -static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data, - void *payload) +static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + size_t off) { struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; @@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, location = calc_location(&cfg->map_locs[idx], tls_base); if (!location) - return payload; + return off; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) - return payload; + return off; descr->id = map.id; descr->cnt = map.cnt; @@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, data->req_meta_valid = 1; } - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { descr->tag_len = len; - payload += len; + off += len; } #ifdef NO_UNROLL @@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, break; descr->key_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { descr->key_lens[i] = len; - payload += len; + off += len; } descr->val_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { descr->val_lens[i] = len; - payload += len; + off += len; } } - return payload; + return off; } #ifdef USE_BPF_LOOP @@ -455,14 +457,20 @@ struct read_var_ctx { struct strobemeta_payload *data; void *tls_base; struct strobemeta_cfg *cfg; - void *payload; + size_t payload_off; /* value gets mutated */ struct strobe_value_generic *value; enum read_type type; }; -static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +static int read_var_callback(__u64 index, struct read_var_ctx *ctx) { + /* lose precision info for ctx->payload_off, verifier won't track + * double xor, barrier_var() is needed to force clang keep both xors. + */ + ctx->payload_off ^= index; + barrier_var(ctx->payload_off); + ctx->payload_off ^= index; switch (ctx->type) { case READ_INT_VAR: if (index >= STROBE_MAX_INTS) @@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx) case READ_MAP_VAR: if (index >= STROBE_MAX_MAPS) return 1; - ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP) + return 1; + ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; case READ_STR_VAR: if (index >= STROBE_MAX_STRS) return 1; - ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN) + return 1; + ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; } return 0; @@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task, pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; struct strobemeta_cfg *cfg; - void *tls_base, *payload; + size_t payload_off; + void *tls_base; cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); if (!cfg) @@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task, data->int_vals_set_mask = 0; data->req_meta_valid = 0; - payload = data->payload; + payload_off = 0; /* * we don't have struct task_struct definition, it should be: * tls_base = (void *)task->thread.fsbase; @@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task, .tls_base = tls_base, .value = &value, .data = data, - .payload = payload, + .payload_off = 0, }; int err; @@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task, err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); if (err != STROBE_MAX_MAPS) return NULL; + + payload_off = ctx.payload_off; + /* this should not really happen, here only to satisfy verifer */ + if (payload_off > sizeof(data->payload)) + payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { - payload += read_str_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { - payload = read_map_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); } #endif /* USE_BPF_LOOP */ @@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task, * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent */ - return payload; + return &data->payload[payload_off]; } SEC("raw_tracepoint/kfree_skb") diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c new file mode 100644 index 000000000000..c78b94b75e83 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test, int a) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call1, int a) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call2, int a) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 464d35bd57c7..b7250eb9c30c 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -15,6 +15,13 @@ struct { } sock_map SEC(".maps"); struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} nop_map SEC(".maps"); + +struct { __uint(type, BPF_MAP_TYPE_SOCKHASH); __uint(max_entries, 2); __type(key, __u32); diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c index df7697b94007..c1f55e1d80a4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cfg.c +++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c @@ -97,4 +97,66 @@ l0_%=: r2 = r0; \ " ::: __clobber_all); } +SEC("socket") +__description("conditional loop (2)") +__success +__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11") +__naked void conditional_loop2(void) +{ + asm volatile (" \ + r9 = 2 ll; \ + r3 = 0x20 ll; \ + r4 = 0x35 ll; \ + r8 = r4; \ + goto l1_%=; \ +l0_%=: r9 -= r3; \ + r9 -= r4; \ + r9 -= r8; \ +l1_%=: r8 += r4; \ + if r8 < 0x64 goto l0_%=; \ + r0 = r9; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2") +__naked void uncond_loop_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 = 1; \ + goto l0_%=; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + + +__naked __noinline __used +static unsigned long never_ending_subprog() +{ + asm volatile (" \ + r0 = r1; \ + goto -1; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +/* infinite loop is detected *after* check_cfg() */ +__failure __msg("infinite loop detected") +__naked void uncond_loop_in_subprog_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 += 1; \ + call never_ending_subprog; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c new file mode 100644 index 000000000000..5905e036e0ea --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 8); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); + __uint(max_entries, 8); +} ringbuf SEC(".maps"); + +struct vm_area_struct; +struct bpf_map; + +struct buf_context { + char *buf; +}; + +struct num_context { + __u64 i; + __u64 j; +}; + +__u8 choice_arr[2] = { 0, 1 }; + +static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx) +{ + if (idx == 0) { + ctx->buf = (char *)(0xDEAD); + return 0; + } + + if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE))) + return 1; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R1 type=scalar expected=fp") +int unsafe_on_2nd_iter(void *unused) +{ + char buf[4]; + struct buf_context loop_ctx = { .buf = buf }; + + bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0); + return 0; +} + +static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i = 0; + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_on_zero_iter(void *unused) +{ + struct num_context loop_ctx = { .i = 32 }; + + bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int widening_cb(__u32 idx, struct num_context *ctx) +{ + ++ctx->i; + return 0; +} + +SEC("?raw_tp") +__success +int widening(void *unused) +{ + struct num_context loop_ctx = { .i = 0, .j = 1 }; + + bpf_loop(100, widening_cb, &loop_ctx, 0); + /* loop_ctx.j is not changed during callback iteration, + * verifier should not apply widening to it. + */ + return choice_arr[loop_ctx.j]; +} + +static int loop_detection_cb(__u32 idx, struct num_context *ctx) +{ + for (;;) {} + return 0; +} + +SEC("?raw_tp") +__failure __msg("infinite loop detected") +int loop_detection(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_loop(100, loop_detection_cb, &loop_ctx, 0); + return 0; +} + +static __always_inline __u64 oob_state_machine(struct num_context *ctx) +{ + switch (ctx->i) { + case 0: + ctx->i = 1; + break; + case 1: + ctx->i = 32; + break; + } + return 0; +} + +static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_for_each_map_elem(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_ringbuf_drain(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_find_vma(void *unused) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct num_context loop_ctx = { .i = 0 }; + + bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int iter_limit_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i++; + return 0; +} + +SEC("?raw_tp") +__success +int bpf_loop_iter_limit_ok(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(1, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=2 size=1") +int bpf_loop_iter_limit_overflow(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(2, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 100; + return 0; +} + +static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 10; + return 0; +} + +static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 1; + bpf_loop(1, iter_limit_level2a_cb, ctx, 0); + bpf_loop(1, iter_limit_level2b_cb, ctx, 0); + return 0; +} + +/* Check that path visiting every callback function once had been + * reached by verifier. Variables 'ctx{1,2}i' below serve as flags, + * with each decimal digit corresponding to a callback visit marker. + */ +SEC("socket") +__success __retval(111111) +int bpf_loop_iter_limit_nested(void *unused) +{ + struct num_context ctx1 = { .i = 0 }; + struct num_context ctx2 = { .i = 0 }; + __u64 a, b, c; + + bpf_loop(1, iter_limit_level1_cb, &ctx1, 0); + bpf_loop(1, iter_limit_level1_cb, &ctx2, 0); + a = ctx1.i; + b = ctx2.i; + /* Force 'ctx1.i' and 'ctx2.i' precise. */ + c = choice_arr[(a + b) % 2]; + /* This makes 'c' zero, but neither clang nor verifier know it. */ + c /= 10; + /* Make sure that verifier does not visit 'impossible' states: + * enumerate all possible callback visit masks. + */ + if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 && + b != 0 && b != 1 && b != 11 && b != 101 && b != 111) + asm volatile ("r0 /= 0;" ::: "r0"); + return 1000 * a + b + c; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 5bc86af80a9a..71735dbf33d4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -75,9 +75,10 @@ l0_%=: r0 += 1; \ " ::: __clobber_all); } -SEC("tracepoint") +SEC("socket") __description("bounded loop, start in the middle") -__failure __msg("back-edge") +__success +__failure_unpriv __msg_unpriv("back-edge") __naked void loop_start_in_the_middle(void) { asm volatile (" \ @@ -136,7 +137,9 @@ l0_%=: exit; \ SEC("tracepoint") __description("bounded recursion") -__failure __msg("back-edge") +__failure +/* verifier limitation in detecting max stack depth */ +__msg("the call stack of 8 frames is too deep !") __naked void bounded_recursion(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 193c0f8272d0..6b564d4c0986 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -91,3 +91,43 @@ __naked int bpf_end_bswap(void) } #endif /* v4 instruction */ + +SEC("?raw_tp") +__success __log_level(2) +/* + * Without the bug fix there will be no history between "last_idx 3 first_idx 3" + * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor + * expected log messages to the one specific mark_chain_precision operation. + * + * This is quite fragile: if verifier checkpointing heuristic changes, this + * might need adjusting. + */ +__msg("2: (07) r0 += 1 ; R0_w=6") +__msg("3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4") +__msg("3: R0_w=6") +__naked int state_loop_first_last_equal(void) +{ + asm volatile ( + "r0 = 0;" + "l0_%=:" + "r0 += 1;" + "r0 += 1;" + /* every few iterations we'll have a checkpoint here with + * first_idx == last_idx, potentially confusing precision + * backtracking logic + */ + "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */ + "goto l0_%=;" + "l1_%=:" + "exit;" + ::: __clobber_common + ); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index db6b3143338b..f61d623b1ce8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body, + * r1 and r4 are always precise for bpf_loop() calls. + */ +__msg("9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r4 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r1 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* r6 precision propagation */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") -__msg("mark_precise: frame0: parent state regs=r0 stack=:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") +/* State entering callback body popped from states stack */ +__msg("from 9 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +__msg("frame 0: propagating r1,r4") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") +__msg("from 18 to 9: safe") __naked int callback_result_precise(void) { asm volatile ( @@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("12: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: last_idx 12 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs=r6 stack=:") -__msg("mark_precise: frame0: last_idx 16 first_idx 0") -__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 9 to 15: frame1:") +__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("15: (b7) r0 = 0") +__msg("16: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r1, r4 are always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 16 to 9: safe") __naked int parent_callee_saved_reg_precise_with_callback(void) { asm volatile ( @@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 10 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 10:") +/* r1, r4 are always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,fp-8") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 18 to 10: safe") __naked int parent_stack_slot_precise_with_callback(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index e959336c7a73..80f620602d50 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -53,6 +53,8 @@ #define DEFAULT_TTL 64 #define MAX_ALLOWED_PORTS 8 +#define MAX_PACKET_OFF 0xffff + #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) @@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void) } struct tcpopt_context { - __u8 *ptr; - __u8 *end; + void *data; void *data_end; __be32 *tsecr; __u8 wscale; bool option_timestamp; bool option_sack; + __u32 off; }; -static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) { - __u8 opcode, opsize; + __u64 off = ctx->off; + __u8 *data; - if (ctx->ptr >= ctx->end) - return 1; - if (ctx->ptr >= ctx->data_end) - return 1; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; - opcode = ctx->ptr[0]; + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - if (opcode == TCPOPT_EOL) - return 1; - if (opcode == TCPOPT_NOP) { - ++ctx->ptr; - return 0; - } + ctx->off += sz; + return data; +} - if (ctx->ptr + 1 >= ctx->end) - return 1; - if (ctx->ptr + 1 >= ctx->data_end) +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 *opcode, *opsize, *wscale, *tsecr; + __u32 off = ctx->off; + + opcode = next(ctx, 1); + if (!opcode) return 1; - opsize = ctx->ptr[1]; - if (opsize < 2) + + if (*opcode == TCPOPT_EOL) return 1; + if (*opcode == TCPOPT_NOP) + return 0; - if (ctx->ptr + opsize > ctx->end) + opsize = next(ctx, 1); + if (!opsize || *opsize < 2) return 1; - switch (opcode) { + switch (*opcode) { case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) - ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + wscale = next(ctx, 1); + if (!wscale) + return 1; + if (*opsize == TCPOLEN_WINDOW) + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + tsecr = next(ctx, 4); + if (!tsecr) + return 1; + if (*opsize == TCPOLEN_TIMESTAMP) { ctx->option_timestamp = true; /* Client's tsval becomes our tsecr. */ - *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + *ctx->tsecr = get_unaligned((__be32 *)tsecr); } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM) + if (*opsize == TCPOLEN_SACK_PERM) ctx->option_sack = true; break; } - ctx->ptr += opsize; + ctx->off = off + *opsize; return 0; } @@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context) static __always_inline bool tscookie_init(struct tcphdr *tcp_header, __u16 tcp_len, __be32 *tsval, - __be32 *tsecr, void *data_end) + __be32 *tsecr, void *data, void *data_end) { struct tcpopt_context loop_ctx = { - .ptr = (__u8 *)(tcp_header + 1), - .end = (__u8 *)tcp_header + tcp_len, + .data = data, .data_end = data_end, .tsecr = tsecr, .wscale = TS_OPT_WSCALE_MASK, .option_timestamp = false, .option_sack = false, + /* Note: currently verifier would track .off as unbound scalar. + * In case if verifier would at some point get smarter and + * compute bounded value for this var, beware that it might + * hinder bpf_loop() convergence validation. + */ + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, }; u32 cookie; @@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, cookie = (__u32)value; if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, - &tsopt_buf[0], &tsopt_buf[1], data_end)) + &tsopt_buf[0], &tsopt_buf[1], data, data_end)) tsopt = tsopt_buf; /* Check that there is enough space for a SYNACK. It also covers diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 1bdf2b43e49e..3d5cd51071f0 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -442,7 +442,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn 0 to 0", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -799,7 +799,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -811,7 +811,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index f9297900cea6..78f19c255f20 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -9,8 +9,8 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { @@ -23,8 +23,8 @@ BPF_LD_IMM64(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 591ca9637b23..b604c570309a 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) struct xdp_info *meta = data - sizeof(struct xdp_info); if (meta->count != pkt->pkt_nb) { - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->pkt_nb, meta->count); + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); return false; } @@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp if (addr >= umem->num_frames * umem->frame_size || addr + len > umem->num_frames * umem->frame_size) { - ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { - ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } @@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", addr); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); return TEST_FAILURE; } @@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) } if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index a6e9848189d6..b5eb1be2248c 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -147,71 +147,6 @@ test_add_proc() } # -# Testing the new "isolated" partition root type -# -test_isolated() -{ - cd $CGROUP2/test - echo 2-3 > cpuset.cpus - TYPE=$(cat cpuset.cpus.partition) - [[ $TYPE = member ]] || echo member > cpuset.cpus.partition - - console_msg "Change from member to root" - test_partition root - - console_msg "Change from root to isolated" - test_partition isolated - - console_msg "Change from isolated to member" - test_partition member - - console_msg "Change from member to isolated" - test_partition isolated - - console_msg "Change from isolated to root" - test_partition root - - console_msg "Change from root to member" - test_partition member - - # - # Testing partition root with no cpu - # - console_msg "Distribute all cpus to child partition" - echo +cpuset > cgroup.subtree_control - test_partition root - - mkdir A1 - cd A1 - echo 2-3 > cpuset.cpus - test_partition root - test_effective_cpus 2-3 - cd .. - test_effective_cpus "" - - console_msg "Moving task to partition test" - test_add_proc "No space left" - cd A1 - test_add_proc "" - cd .. - - console_msg "Shrink and expand child partition" - cd A1 - echo 2 > cpuset.cpus - cd .. - test_effective_cpus 3 - cd A1 - echo 2-3 > cpuset.cpus - cd .. - test_effective_cpus "" - - # Cleaning up - console_msg "Cleaning up" - echo $$ > $CGROUP2/cgroup.procs - [[ -d A1 ]] && rmdir A1 -} - -# # Cpuset controller state transition test matrix. # # Cgroup test hierarchy @@ -297,14 +232,14 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ A1:P0,A2:P1,A3:P2,B1:P1 2-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4" + A1:P0,A2:P2,A3:P1 2-4,2-3" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1 ." + A1:P0,A2:P-2,A3:P-1" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ A1:P0,A2:P2,A3:P-1 2-4" @@ -313,7 +248,7 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," # An invalidated remote partition cannot self-recover from hotplug " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" @@ -347,10 +282,10 @@ TEST_MATRIX=( # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" @@ -359,13 +294,13 @@ TEST_MATRIX=( A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -441,7 +376,7 @@ write_cpu_online() } fi echo $VAL > $CPUFILE - pause 0.01 + pause 0.05 } # @@ -573,12 +508,14 @@ dump_states() XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions + ISCPUS=$DIR/cpuset.cpus.isolated [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)" [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)" + [[ -e $ISCPUS ]] && echo "$ISCPUS: $(cat $ISCPUS)" done } @@ -656,11 +593,17 @@ check_cgroup_states() # # Get isolated (including offline) CPUs by looking at -# /sys/kernel/debug/sched/domains and compare that with the expected value. +# /sys/kernel/debug/sched/domains and cpuset.cpus.isolated control file, +# if available, and compare that with the expected value. # -# Note that a sched domain of just 1 CPU will be considered isolated. +# Note that isolated CPUs from the sched/domains context include offline +# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may +# not be included in the cpuset.cpus.isolated control file which contains +# only CPUs in isolated partitions. # -# $1 - expected isolated cpu list +# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>} +# <isolcpus1> - expected sched/domains value +# <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined # check_isolcpus() { @@ -668,8 +611,38 @@ check_isolcpus() ISOLCPUS= LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + if [[ $EXPECT_VAL = . ]] + then + EXPECT_VAL= + EXPECT_VAL2= + elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + then + set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") + EXPECT_VAL=$1 + EXPECT_VAL2=$2 + else + EXPECT_VAL2=$EXPECT_VAL + fi + + # + # Check the debug isolated cpumask, if present + # + [[ -f $ISCPUS ]] && { + ISOLCPUS=$(cat $ISCPUS) + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + # Take a 50ms pause and try again + pause 0.05 + ISOLCPUS=$(cat $ISCPUS) + } + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + ISOLCPUS= + } + + # + # Use the sched domain in debugfs to check isolated CPUs, if available + # [[ -d $SCHED_DOMAINS ]] || return 0 - [[ $EXPECT_VAL = . ]] && EXPECT_VAL= for ((CPU=0; CPU < $NR_CPUS; CPU++)) do @@ -714,6 +687,26 @@ test_fail() } # +# Check to see if there are unexpected isolated CPUs left +# +null_isolcpus_check() +{ + [[ $VERBOSE -gt 0 ]] || return 0 + # Retry a few times before printing error + RETRY=0 + while [[ $RETRY -lt 5 ]] + do + pause 0.01 + check_isolcpus "." + [[ $? -eq 0 ]] && return 0 + ((RETRY++)) + done + echo "Unexpected isolated CPUs: $ISOLCPUS" + dump_states + exit 1 +} + +# # Run cpuset state transition test # $1 - test matrix name # @@ -787,7 +780,7 @@ run_state_test() # NEWLIST=$(cat cpuset.cpus.effective) RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]] + while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] do # Wait a bit longer & recheck a few times pause 0.01 @@ -798,6 +791,7 @@ run_state_test() echo "Effective cpus changed to $NEWLIST after test $I!" exit 1 } + null_isolcpus_check [[ $VERBOSE -gt 0 ]] && echo "Test $I done." ((I++)) done @@ -805,6 +799,72 @@ run_state_test() } # +# Testing the new "isolated" partition root type +# +test_isolated() +{ + cd $CGROUP2/test + echo 2-3 > cpuset.cpus + TYPE=$(cat cpuset.cpus.partition) + [[ $TYPE = member ]] || echo member > cpuset.cpus.partition + + console_msg "Change from member to root" + test_partition root + + console_msg "Change from root to isolated" + test_partition isolated + + console_msg "Change from isolated to member" + test_partition member + + console_msg "Change from member to isolated" + test_partition isolated + + console_msg "Change from isolated to root" + test_partition root + + console_msg "Change from root to member" + test_partition member + + # + # Testing partition root with no cpu + # + console_msg "Distribute all cpus to child partition" + echo +cpuset > cgroup.subtree_control + test_partition root + + mkdir A1 + cd A1 + echo 2-3 > cpuset.cpus + test_partition root + test_effective_cpus 2-3 + cd .. + test_effective_cpus "" + + console_msg "Moving task to partition test" + test_add_proc "No space left" + cd A1 + test_add_proc "" + cd .. + + console_msg "Shrink and expand child partition" + cd A1 + echo 2 > cpuset.cpus + cd .. + test_effective_cpus 3 + cd A1 + echo 2-3 > cpuset.cpus + cd .. + test_effective_cpus "" + + # Cleaning up + console_msg "Cleaning up" + echo $$ > $CGROUP2/cgroup.procs + [[ -d A1 ]] && rmdir A1 + null_isolcpus_check +} + +# # Wait for inotify event for the given file and read it # $1: cgroup file to wait for # $2: file to store the read result diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index ff519029f6f4..8845353aca53 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -740,7 +740,7 @@ static int test_cgfreezer_ptraced(const char *root) /* * cg_check_frozen(cgroup, true) will fail here, - * because the task in in the TRACEd state. + * because the task is in the TRACEd state. */ if (cg_freeze_wait(cgroup, false)) goto cleanup; diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index c99d2adaca3f..47fdaa146443 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -50,9 +50,9 @@ static int get_zswap_stored_pages(size_t *value) return read_int("/sys/kernel/debug/zswap/stored_pages", value); } -static int get_zswap_written_back_pages(size_t *value) +static int get_cg_wb_count(const char *cg) { - return read_int("/sys/kernel/debug/zswap/written_back_pages", value); + return cg_read_key_long(cg, "memory.stat", "zswp_wb"); } static long get_zswpout(const char *cgroup) @@ -73,6 +73,24 @@ static int allocate_bytes(const char *cgroup, void *arg) return 0; } +static char *setup_test_group_1M(const char *root, const char *name) +{ + char *group_name = cg_name(root, name); + + if (!group_name) + return NULL; + if (cg_create(group_name)) + goto fail; + if (cg_write(group_name, "memory.max", "1M")) { + cg_destroy(group_name); + goto fail; + } + return group_name; +fail: + free(group_name); + return NULL; +} + /* * Sanity test to check that pages are written into zswap. */ @@ -117,43 +135,51 @@ out: /* * When trying to store a memcg page in zswap, if the memcg hits its memory - * limit in zswap, writeback should not be triggered. - * - * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may - * not zswap"). Needs to be revised when a per memcg writeback mechanism is - * implemented. + * limit in zswap, writeback should affect only the zswapped pages of that + * memcg. */ static int test_no_invasive_cgroup_shrink(const char *root) { - size_t written_back_before, written_back_after; int ret = KSFT_FAIL; - char *test_group; + size_t control_allocation_size = MB(10); + char *control_allocation, *wb_group = NULL, *control_group = NULL; /* Set up */ - test_group = cg_name(root, "no_shrink_test"); - if (!test_group) - goto out; - if (cg_create(test_group)) + wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); + if (!wb_group) + return KSFT_FAIL; + if (cg_write(wb_group, "memory.zswap.max", "10K")) goto out; - if (cg_write(test_group, "memory.max", "1M")) + control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); + if (!control_group) goto out; - if (cg_write(test_group, "memory.zswap.max", "10K")) + + /* Push some test_group2 memory into zswap */ + if (cg_enter_current(control_group)) goto out; - if (get_zswap_written_back_pages(&written_back_before)) + control_allocation = malloc(control_allocation_size); + for (int i = 0; i < control_allocation_size; i += 4095) + control_allocation[i] = 'a'; + if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) goto out; - /* Allocate 10x memory.max to push memory into zswap */ - if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ + if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) goto out; - /* Verify that no writeback happened because of the memcg allocation */ - if (get_zswap_written_back_pages(&written_back_after)) - goto out; - if (written_back_after == written_back_before) + /* Verify that only zswapped memory from gwb_group has been written back */ + if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) ret = KSFT_PASS; out: - cg_destroy(test_group); - free(test_group); + cg_enter_current(root); + if (control_group) { + cg_destroy(control_group); + free(control_group); + } + cg_destroy(wb_group); + free(wb_group); + if (control_allocation) + free(control_allocation); return ret; } diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index b71247ba7196..8a1cc2bf1864 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -2,6 +2,7 @@ # Makefile for damon selftests TEST_GEN_FILES += huge_count_read_write +TEST_GEN_FILES += access_memory TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh @@ -9,6 +10,8 @@ TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += debugfs_rm_non_contexts.sh TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py +TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py new file mode 100644 index 000000000000..e98cf4b6a4b7 --- /dev/null +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -0,0 +1,322 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os + +sysfs_root = '/sys/kernel/mm/damon/admin' + +def write_file(path, string): + "Returns error string if failed, or None otherwise" + string = '%s' % string + try: + with open(path, 'w') as f: + f.write(string) + except Exception as e: + return '%s' % e + return None + +def read_file(path): + '''Returns the read content and error string. The read content is None if + the reading failed''' + try: + with open(path, 'r') as f: + return f.read(), None + except Exception as e: + return None, '%s' % e + +class DamosAccessPattern: + size = None + nr_accesses = None + age = None + scheme = None + + def __init__(self, size=None, nr_accesses=None, age=None): + self.size = size + self.nr_accesses = nr_accesses + self.age = age + + if self.size == None: + self.size = [0, 2**64 - 1] + if self.nr_accesses == None: + self.nr_accesses = [0, 2**64 - 1] + if self.age == None: + self.age = [0, 2**64 - 1] + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'access_pattern') + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'), + self.nr_accesses[0]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'), + self.nr_accesses[1]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1]) + if err != None: + return err + +class Damos: + action = None + access_pattern = None + # todo: Support quotas, watermarks, stats, tried_regions + idx = None + context = None + tried_bytes = None + + def __init__(self, action='stat', access_pattern=DamosAccessPattern()): + self.action = action + self.access_pattern = access_pattern + self.access_pattern.scheme = self + + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'schemes', '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action) + if err != None: + return err + err = self.access_pattern.stage() + if err != None: + return err + + # disable quotas + err = write_file(os.path.join(self.sysfs_dir(), 'quotas', 'ms'), '0') + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'quotas', 'bytes'), '0') + if err != None: + return err + + # disable watermarks + err = write_file( + os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') + if err != None: + return err + + # disable filters + err = write_file( + os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') + if err != None: + return err + +class DamonTarget: + pid = None + # todo: Support target regions if test is made + idx = None + context = None + + def __init__(self, pid): + self.pid = pid + + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'targets', '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0') + if err != None: + return err + return write_file( + os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) + +class DamonAttrs: + sample_us = None + aggr_us = None + update_us = None + min_nr_regions = None + max_nr_regions = None + context = None + + def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000, + min_nr_regions=10, max_nr_regions=1000): + self.sample_us = sample_us + self.aggr_us = aggr_us + self.update_us = update_us + self.min_nr_regions = min_nr_regions + self.max_nr_regions = max_nr_regions + + def interval_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'intervals') + + def nr_regions_range_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'nr_regions') + + def stage(self): + err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'), + self.sample_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'), + self.aggr_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), + self.update_us) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'min'), + self.min_nr_regions) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'max'), + self.max_nr_regions) + if err != None: + return err + +class DamonCtx: + ops = None + monitoring_attrs = None + targets = None + schemes = None + kdamond = None + idx = None + + def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[], + schemes=[]): + self.ops = ops + self.monitoring_attrs = monitoring_attrs + self.monitoring_attrs.context = self + + self.targets = targets + for idx, target in enumerate(self.targets): + target.idx = idx + target.context = self + + self.schemes = schemes + for idx, scheme in enumerate(self.schemes): + scheme.idx = idx + scheme.context = self + + def sysfs_dir(self): + return os.path.join(self.kdamond.sysfs_dir(), 'contexts', + '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'operations'), self.ops) + if err != None: + return err + err = self.monitoring_attrs.stage() + if err != None: + return err + + nr_targets_file = os.path.join( + self.sysfs_dir(), 'targets', 'nr_targets') + content, err = read_file(nr_targets_file) + if err != None: + return err + if int(content) != len(self.targets): + err = write_file(nr_targets_file, '%d' % len(self.targets)) + if err != None: + return err + for target in self.targets: + err = target.stage() + if err != None: + return err + + nr_schemes_file = os.path.join( + self.sysfs_dir(), 'schemes', 'nr_schemes') + content, err = read_file(nr_schemes_file) + if int(content) != len(self.schemes): + err = write_file(nr_schemes_file, '%d' % len(self.schemes)) + if err != None: + return err + for scheme in self.schemes: + err = scheme.stage() + if err != None: + return err + return None + +class Kdamond: + state = None + pid = None + contexts = None + idx = None # index of this kdamond between siblings + kdamonds = None # parent + + def __init__(self, contexts=[]): + self.contexts = contexts + for idx, context in enumerate(self.contexts): + context.idx = idx + context.kdamond = self + + def sysfs_dir(self): + return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx) + + def start(self): + nr_contexts_file = os.path.join(self.sysfs_dir(), + 'contexts', 'nr_contexts') + content, err = read_file(nr_contexts_file) + if err != None: + return err + if int(content) != len(self.contexts): + err = write_file(nr_contexts_file, '%d' % len(self.contexts)) + if err != None: + return err + + for context in self.contexts: + err = context.stage() + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') + return err + + def update_schemes_tried_bytes(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_tried_bytes') + if err != None: + return err + for context in self.contexts: + for scheme in context.schemes: + content, err = read_file(os.path.join(scheme.sysfs_dir(), + 'tried_regions', 'total_bytes')) + if err != None: + return err + scheme.tried_bytes = int(content) + +class Kdamonds: + kdamonds = [] + + def __init__(self, kdamonds=[]): + self.kdamonds = kdamonds + for idx, kdamond in enumerate(self.kdamonds): + kdamond.idx = idx + kdamond.kdamonds = self + + def sysfs_dir(self): + return os.path.join(sysfs_root, 'kdamonds') + + def start(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'), + '%s' % len(self.kdamonds)) + if err != None: + return err + for kdamond in self.kdamonds: + err = kdamond.start() + if err != None: + return err + return None diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c new file mode 100644 index 000000000000..585a2fa54329 --- /dev/null +++ b/tools/testing/selftests/damon/access_memory.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Artificial memory access program for testing DAMON. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +int main(int argc, char *argv[]) +{ + char **regions; + clock_t start_clock; + int nr_regions; + int sz_region; + int access_time_ms; + int i; + + if (argc != 4) { + printf("Usage: %s <number> <size (bytes)> <time (ms)>\n", + argv[0]); + return -1; + } + + nr_regions = atoi(argv[1]); + sz_region = atoi(argv[2]); + access_time_ms = atoi(argv[3]); + + regions = malloc(sizeof(*regions) * nr_regions); + for (i = 0; i < nr_regions; i++) + regions[i] = malloc(sz_region); + + for (i = 0; i < nr_regions; i++) { + start_clock = clock(); + while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC < + access_time_ms) + memset(regions[i], i, 1024 * 1024 * 10); + } + return 0; +} diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 56f0230a8b92..e9a976d296e2 100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -150,6 +150,32 @@ test_weights() ensure_file "$weights_dir/age_permil" "exist" "600" } +test_goal() +{ + goal_dir=$1 + ensure_dir "$goal_dir" "exist" + ensure_file "$goal_dir/target_value" "exist" "600" + ensure_file "$goal_dir/current_value" "exist" "600" +} + +test_goals() +{ + goals_dir=$1 + ensure_dir "$goals_dir" "exist" + ensure_file "$goals_dir/nr_goals" "exist" "600" + + ensure_write_succ "$goals_dir/nr_goals" "1" "valid input" + test_goal "$goals_dir/0" + + ensure_write_succ "$goals_dir/nr_goals" "2" "valid input" + test_goal "$goals_dir/0" + test_goal "$goals_dir/1" + + ensure_write_succ "$goals_dir/nr_goals" "0" "valid input" + ensure_dir "$goals_dir/0" "not_exist" + ensure_dir "$goals_dir/1" "not_exist" +} + test_quotas() { quotas_dir=$1 @@ -158,6 +184,7 @@ test_quotas() ensure_file "$quotas_dir/bytes" "exist" 600 ensure_file "$quotas_dir/reset_interval_ms" "exist" 600 test_weights "$quotas_dir/weights" + test_goals "$quotas_dir/goals" } test_access_pattern() diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py new file mode 100644 index 000000000000..8c690ba1a573 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + proc = subprocess.Popen(['sleep', '2']) + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos( + access_pattern=_damon_sysfs.DamosAccessPattern( + nr_accesses=[200, 200]))] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err != None: + print('kdmaond start failed: %s' % err) + exit(1) + + while proc.poll() == None: + err = kdamonds.kdamonds[0].update_schemes_tried_bytes() + if err != None: + print('tried bytes update failed: %s' % err) + exit(1) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py new file mode 100644 index 000000000000..cdbf19b442c9 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + # access two 10 MiB memory regions, 2 second per each + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000']) + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos( + access_pattern=_damon_sysfs.DamosAccessPattern( + # >= 25% access rate, >= 200ms age + nr_accesses=[5, 20], age=[2, 2**64 - 1]))] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err != None: + print('kdmaond start failed: %s' % err) + exit(1) + + wss_collected = [] + while proc.poll() == None: + time.sleep(0.1) + err = kdamonds.kdamonds[0].update_schemes_tried_bytes() + if err != None: + print('tried bytes update failed: %s' % err) + exit(1) + + wss_collected.append( + kdamonds.kdamonds[0].contexts[0].schemes[0].tried_bytes) + + wss_collected.sort() + acceptable_error_rate = 0.2 + for percentile in [50, 75]: + sample = wss_collected[int(len(wss_collected) * percentile / 100)] + error_rate = abs(sample - sz_region) / sz_region + print('%d-th percentile (%d) error %f' % + (percentile, sample, error_rate)) + if error_rate > acceptable_error_rate: + print('the error rate is not acceptable (> %f)' % + acceptable_error_rate) + print('samples are as below') + print('\n'.join(['%d' % wss for wss in wss_collected])) + exit(1) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh index 4917dbb35a44..5667febee328 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh @@ -30,16 +30,16 @@ ip netns exec server ip addr add ${server_ip4}/24 dev eth0 ip netns exec client ip link add dev bond0 down type bond mode 1 \ miimon 100 all_slaves_active 1 -ip netns exec client ip link set dev eth0 down master bond0 +ip netns exec client ip link set dev eth0 master bond0 ip netns exec client ip link set dev bond0 up ip netns exec client ip addr add ${client_ip4}/24 dev bond0 ip netns exec client ping -c 5 $server_ip4 >/dev/null -ip netns exec client ip link set dev eth0 down nomaster +ip netns exec client ip link set dev eth0 nomaster ip netns exec client ip link set dev bond0 down ip netns exec client ip link set dev bond0 type bond mode 0 \ arp_interval 1000 arp_ip_target "+${server_ip4}" -ip netns exec client ip link set dev eth0 down master bond0 +ip netns exec client ip link set dev eth0 master bond0 ip netns exec client ip link set dev bond0 up ip netns exec client ping -c 5 $server_ip4 >/dev/null diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore new file mode 100644 index 000000000000..52ae618fdd98 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +dev_in_maps diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile new file mode 100644 index 000000000000..56b2b48a765b --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := dev_in_maps + +CFLAGS := -Wall -Werror + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c new file mode 100644 index 000000000000..e19ab0e85709 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <inttypes.h> +#include <unistd.h> +#include <stdio.h> + +#include <linux/unistd.h> +#include <linux/types.h> +#include <linux/mount.h> +#include <sys/syscall.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sched.h> +#include <fcntl.h> + +#include "../../kselftest.h" +#include "log.h" + +static int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} + +static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +static int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); +} + +static long get_file_dev_and_inode(void *addr, struct statx *stx) +{ + char buf[4096]; + FILE *mapf; + + mapf = fopen("/proc/self/maps", "r"); + if (mapf == NULL) + return pr_perror("fopen(/proc/self/maps)"); + + while (fgets(buf, sizeof(buf), mapf)) { + unsigned long start, end; + uint32_t maj, min; + __u64 ino; + + if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu", + &start, &end, &maj, &min, &ino) != 5) + return pr_perror("unable to parse: %s", buf); + if (start == (unsigned long)addr) { + stx->stx_dev_major = maj; + stx->stx_dev_minor = min; + stx->stx_ino = ino; + return 0; + } + } + + return pr_err("unable to find the mapping"); +} + +static int ovl_mount(void) +{ + int tmpfs, fsfd, ovl; + + fsfd = sys_fsopen("tmpfs", 0); + if (fsfd == -1) + return pr_perror("fsopen(tmpfs)"); + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1) + return pr_perror("FSCONFIG_CMD_CREATE"); + + tmpfs = sys_fsmount(fsfd, 0, 0); + if (tmpfs == -1) + return pr_perror("fsmount"); + + close(fsfd); + + /* overlayfs can't be constructed on top of a detached mount. */ + if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)) + return pr_perror("move_mount"); + close(tmpfs); + + if (mkdir("/tmp/w", 0755) == -1 || + mkdir("/tmp/u", 0755) == -1 || + mkdir("/tmp/l", 0755) == -1) + return pr_perror("mkdir"); + + fsfd = sys_fsopen("overlay", 0); + if (fsfd == -1) + return pr_perror("fsopen(overlay)"); + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 || + sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 || + sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 || + sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1) + return pr_perror("fsconfig"); + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1) + return pr_perror("fsconfig"); + ovl = sys_fsmount(fsfd, 0, 0); + if (ovl == -1) + return pr_perror("fsmount"); + + return ovl; +} + +/* + * Check that the file device and inode shown in /proc/pid/maps match values + * returned by stat(2). + */ +static int test(void) +{ + struct statx stx, mstx; + int ovl, fd; + void *addr; + + ovl = ovl_mount(); + if (ovl == -1) + return -1; + + fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644); + if (fd == -1) + return pr_perror("openat"); + + addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) + return pr_perror("mmap"); + + if (get_file_dev_and_inode(addr, &mstx)) + return -1; + if (statx(fd, "", AT_EMPTY_PATH | AT_STATX_SYNC_AS_STAT, STATX_INO, &stx)) + return pr_perror("statx"); + + if (stx.stx_dev_major != mstx.stx_dev_major || + stx.stx_dev_minor != mstx.stx_dev_minor || + stx.stx_ino != mstx.stx_ino) + return pr_fail("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n", + mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino, + stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino); + + ksft_test_result_pass("devices are matched\n"); + return 0; +} + +int main(int argc, char **argv) +{ + int fsfd; + + fsfd = sys_fsopen("overlay", 0); + if (fsfd == -1) { + ksft_test_result_skip("unable to create overlay mount\n"); + return 1; + } + close(fsfd); + + /* Create a new mount namespace to not care about cleaning test mounts. */ + if (unshare(CLONE_NEWNS) == -1) { + ksft_test_result_skip("unable to create a new mount namespace\n"); + return 1; + } + + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) { + pr_perror("mount"); + return 1; + } + + ksft_set_plan(1); + + if (test()) + return 1; + + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/filesystems/overlayfs/log.h b/tools/testing/selftests/filesystems/overlayfs/log.h new file mode 100644 index 000000000000..db64df2a8483 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/log.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTEST_TIMENS_LOG_H__ +#define __SELFTEST_TIMENS_LOG_H__ + +#define pr_msg(fmt, lvl, ...) \ + ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \ + lvl, __FILE__, __LINE__, ##__VA_ARGS__) + +#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__) + +#define pr_err(fmt, ...) \ + ({ \ + ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \ + -1; \ + }) + +#define pr_fail(fmt, ...) \ + ({ \ + ksft_test_result_fail(fmt, ##__VA_ARGS__); \ + -1; \ + }) + +#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__) + +#endif diff --git a/tools/testing/selftests/filesystems/statmount/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore new file mode 100644 index 000000000000..82a4846cbc4b --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/*_test diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile new file mode 100644 index 000000000000..07a0d5b545ca --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +TEST_GEN_PROGS := statmount_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c new file mode 100644 index 000000000000..3eafd7da58e2 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE + +#include <assert.h> +#include <stdint.h> +#include <sched.h> +#include <fcntl.h> +#include <sys/param.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <linux/mount.h> +#include <linux/stat.h> +#include <asm/unistd.h> + +#include "../../kselftest.h" + +static const char *const known_fs[] = { + "9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs", + "autofs", "bcachefs", "bdev", "befs", "bfs", "binder", "binfmt_misc", + "bpf", "btrfs", "btrfs_test_fs", "ceph", "cgroup", "cgroup2", "cifs", + "coda", "configfs", "cpuset", "cramfs", "cxl", "dax", "debugfs", + "devpts", "devtmpfs", "dmabuf", "drm", "ecryptfs", "efivarfs", "efs", + "erofs", "exfat", "ext2", "ext3", "ext4", "f2fs", "functionfs", + "fuse", "fuseblk", "fusectl", "gadgetfs", "gfs2", "gfs2meta", "hfs", + "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem", + "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", + "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", + "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", + "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", + "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", + "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", + "zonefs", NULL }; + +static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf, + size_t bufsize, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) +{ + size_t bufsize = 1 << 15; + struct statmount *buf = NULL, *tmp = alloca(bufsize); + int tofree = 0; + int ret; + + for (;;) { + ret = statmount(mnt_id, mask, tmp, bufsize, flags); + if (ret != -1) + break; + if (tofree) + free(tmp); + if (errno != EOVERFLOW) + return NULL; + bufsize <<= 1; + tofree = 1; + tmp = malloc(bufsize); + if (!tmp) + return NULL; + } + buf = malloc(tmp->size); + if (buf) + memcpy(buf, tmp, tmp->size); + if (tofree) + free(tmp); + + return buf; +} + +static void write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) + ksft_exit_fail_msg("opening %s for write: %s\n", path, strerror(errno)); + + ret = write(fd, val, len); + if (ret == -1) + ksft_exit_fail_msg("writing to %s: %s\n", path, strerror(errno)); + if (ret != len) + ksft_exit_fail_msg("short write to %s\n", path); + + ret = close(fd); + if (ret == -1) + ksft_exit_fail_msg("closing %s\n", path); +} + +static uint64_t get_mnt_id(const char *name, const char *path, uint64_t mask) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, mask, &sx); + if (ret == -1) + ksft_exit_fail_msg("retrieving %s mount ID for %s: %s\n", + mask & STATX_MNT_ID_UNIQUE ? "unique" : "old", + name, strerror(errno)); + if (!(sx.stx_mask & mask)) + ksft_exit_fail_msg("no %s mount ID available for %s\n", + mask & STATX_MNT_ID_UNIQUE ? "unique" : "old", + name); + + return sx.stx_mnt_id; +} + + +static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; +static int orig_root; +static uint64_t root_id, parent_id; +static uint32_t old_root_id, old_parent_id; + + +static void cleanup_namespace(void) +{ + fchdir(orig_root); + chroot("."); + umount2(root_mntpoint, MNT_DETACH); + rmdir(root_mntpoint); +} + +static void setup_namespace(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + if (ret == -1) + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + + sprintf(buf, "0 %d 1", uid); + write_file("/proc/self/uid_map", buf); + write_file("/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file("/proc/self/gid_map", buf); + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret == -1) + ksft_exit_fail_msg("making mount tree private: %s\n", + strerror(errno)); + + if (!mkdtemp(root_mntpoint)) + ksft_exit_fail_msg("creating temporary directory %s: %s\n", + root_mntpoint, strerror(errno)); + + old_parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID); + parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID_UNIQUE); + + orig_root = open("/", O_PATH); + if (orig_root == -1) + ksft_exit_fail_msg("opening root directory: %s", + strerror(errno)); + + atexit(cleanup_namespace); + + ret = mount(root_mntpoint, root_mntpoint, NULL, MS_BIND, NULL); + if (ret == -1) + ksft_exit_fail_msg("mounting temp root %s: %s\n", + root_mntpoint, strerror(errno)); + + ret = chroot(root_mntpoint); + if (ret == -1) + ksft_exit_fail_msg("chroot to temp root %s: %s\n", + root_mntpoint, strerror(errno)); + + ret = chdir("/"); + if (ret == -1) + ksft_exit_fail_msg("chdir to root: %s\n", strerror(errno)); + + old_root_id = get_mnt_id("root", "/", STATX_MNT_ID); + root_id = get_mnt_id("root", "/", STATX_MNT_ID_UNIQUE); +} + +static int setup_mount_tree(int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_REC|MS_SHARED, NULL); + if (ret == -1) { + ksft_test_result_fail("making mount tree shared: %s\n", + strerror(errno)); + return -1; + } + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + if (ret == -1) { + ksft_test_result_fail("mounting submount %s: %s\n", + root_mntpoint, strerror(errno)); + return -1; + } + } + return 0; +} + +static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id, + uint64_t list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + return syscall(__NR_listmount, &req, list, num, flags); +} + +static void test_listmount_empty_root(void) +{ + ssize_t res; + const unsigned int size = 32; + uint64_t list[size]; + + res = listmount(LSMT_ROOT, 0, list, size, 0); + if (res == -1) { + ksft_test_result_fail("listmount: %s\n", strerror(errno)); + return; + } + if (res != 1) { + ksft_test_result_fail("listmount result is %zi != 1\n", res); + return; + } + + if (list[0] != root_id) { + ksft_test_result_fail("listmount ID doesn't match 0x%llx != 0x%llx\n", + (unsigned long long) list[0], + (unsigned long long) root_id); + return; + } + + ksft_test_result_pass("listmount empty root\n"); +} + +static void test_statmount_zero_mask(void) +{ + struct statmount sm; + int ret; + + ret = statmount(root_id, 0, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount zero mask: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != 0) { + ksft_test_result_fail("unexpected mask: 0x%llx != 0x0\n", + (unsigned long long) sm.mask); + return; + } + + ksft_test_result_pass("statmount zero mask\n"); +} + +static void test_statmount_mnt_basic(void) +{ + struct statmount sm; + int ret; + uint64_t mask = STATMOUNT_MNT_BASIC; + + ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount mnt basic: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != mask) { + ksft_test_result_skip("statmount mnt basic unavailable\n"); + return; + } + + if (sm.mnt_id != root_id) { + ksft_test_result_fail("unexpected root ID: 0x%llx != 0x%llx\n", + (unsigned long long) sm.mnt_id, + (unsigned long long) root_id); + return; + } + + if (sm.mnt_id_old != old_root_id) { + ksft_test_result_fail("unexpected old root ID: %u != %u\n", + sm.mnt_id_old, old_root_id); + return; + } + + if (sm.mnt_parent_id != parent_id) { + ksft_test_result_fail("unexpected parent ID: 0x%llx != 0x%llx\n", + (unsigned long long) sm.mnt_parent_id, + (unsigned long long) parent_id); + return; + } + + if (sm.mnt_parent_id_old != old_parent_id) { + ksft_test_result_fail("unexpected old parent ID: %u != %u\n", + sm.mnt_parent_id_old, old_parent_id); + return; + } + + if (sm.mnt_propagation != MS_PRIVATE) { + ksft_test_result_fail("unexpected propagation: 0x%llx\n", + (unsigned long long) sm.mnt_propagation); + return; + } + + ksft_test_result_pass("statmount mnt basic\n"); +} + + +static void test_statmount_sb_basic(void) +{ + struct statmount sm; + int ret; + uint64_t mask = STATMOUNT_SB_BASIC; + struct statx sx; + struct statfs sf; + + ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount sb basic: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != mask) { + ksft_test_result_skip("statmount sb basic unavailable\n"); + return; + } + + ret = statx(AT_FDCWD, "/", 0, 0, &sx); + if (ret == -1) { + ksft_test_result_fail("stat root failed: %s\n", + strerror(errno)); + return; + } + + if (sm.sb_dev_major != sx.stx_dev_major || + sm.sb_dev_minor != sx.stx_dev_minor) { + ksft_test_result_fail("unexpected sb dev %u:%u != %u:%u\n", + sm.sb_dev_major, sm.sb_dev_minor, + sx.stx_dev_major, sx.stx_dev_minor); + return; + } + + ret = statfs("/", &sf); + if (ret == -1) { + ksft_test_result_fail("statfs root failed: %s\n", + strerror(errno)); + return; + } + + if (sm.sb_magic != sf.f_type) { + ksft_test_result_fail("unexpected sb magic: 0x%llx != 0x%lx\n", + (unsigned long long) sm.sb_magic, + sf.f_type); + return; + } + + ksft_test_result_pass("statmount sb basic\n"); +} + +static void test_statmount_mnt_point(void) +{ + struct statmount *sm; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0); + if (!sm) { + ksft_test_result_fail("statmount mount point: %s\n", + strerror(errno)); + return; + } + + if (strcmp(sm->str + sm->mnt_point, "/") != 0) { + ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", + sm->str + sm->mnt_point); + goto out; + } + ksft_test_result_pass("statmount mount point\n"); +out: + free(sm); +} + +static void test_statmount_mnt_root(void) +{ + struct statmount *sm; + const char *mnt_root, *last_dir, *last_root; + + last_dir = strrchr(root_mntpoint, '/'); + assert(last_dir); + last_dir++; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0); + if (!sm) { + ksft_test_result_fail("statmount mount root: %s\n", + strerror(errno)); + return; + } + mnt_root = sm->str + sm->mnt_root; + last_root = strrchr(mnt_root, '/'); + if (last_root) + last_root++; + else + last_root = mnt_root; + + if (strcmp(last_dir, last_root) != 0) { + ksft_test_result_fail("unexpected mount root last component: '%s' != '%s'\n", + last_root, last_dir); + goto out; + } + ksft_test_result_pass("statmount mount root\n"); +out: + free(sm); +} + +static void test_statmount_fs_type(void) +{ + struct statmount *sm; + const char *fs_type; + const char *const *s; + + sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0); + if (!sm) { + ksft_test_result_fail("statmount fs type: %s\n", + strerror(errno)); + return; + } + fs_type = sm->str + sm->fs_type; + for (s = known_fs; s != NULL; s++) { + if (strcmp(fs_type, *s) == 0) + break; + } + if (!s) + ksft_print_msg("unknown filesystem type: %s\n", fs_type); + + ksft_test_result_pass("statmount fs type\n"); + free(sm); +} + +static void test_statmount_string(uint64_t mask, size_t off, const char *name) +{ + struct statmount *sm; + size_t len, shortsize, exactsize; + uint32_t start, i; + int ret; + + sm = statmount_alloc(root_id, mask, 0); + if (!sm) { + ksft_test_result_fail("statmount %s: %s\n", name, + strerror(errno)); + goto out; + } + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto out; + } + if (sm->mask != mask) { + ksft_test_result_skip("statmount %s unavailable\n", name); + goto out; + } + len = sm->size - sizeof(*sm); + start = ((uint32_t *) sm)[off]; + + for (i = start;; i++) { + if (i >= len) { + ksft_test_result_fail("string out of bounds\n"); + goto out; + } + if (!sm->str[i]) + break; + } + exactsize = sm->size; + shortsize = sizeof(*sm) + i; + + ret = statmount(root_id, mask, sm, exactsize, 0); + if (ret == -1) { + ksft_test_result_fail("statmount exact size: %s\n", + strerror(errno)); + goto out; + } + errno = 0; + ret = statmount(root_id, mask, sm, shortsize, 0); + if (ret != -1 || errno != EOVERFLOW) { + ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", + strerror(errno)); + goto out; + } + + ksft_test_result_pass("statmount string %s\n", name); +out: + free(sm); +} + +static void test_listmount_tree(void) +{ + ssize_t res; + const unsigned int log2_num = 4; + const unsigned int step = 3; + const unsigned int size = (1 << log2_num) + step + 1; + size_t num, expect = 1 << log2_num; + uint64_t list[size]; + uint64_t list2[size]; + size_t i; + + + res = setup_mount_tree(log2_num); + if (res == -1) + return; + + num = res = listmount(LSMT_ROOT, 0, list, size, 0); + if (res == -1) { + ksft_test_result_fail("listmount: %s\n", strerror(errno)); + return; + } + if (num != expect) { + ksft_test_result_fail("listmount result is %zi != %zi\n", + res, expect); + return; + } + + for (i = 0; i < size - step;) { + res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); + if (res == -1) + ksft_test_result_fail("short listmount: %s\n", + strerror(errno)); + i += res; + if (res < step) + break; + } + if (i != num) { + ksft_test_result_fail("different number of entries: %zu != %zu\n", + i, num); + return; + } + for (i = 0; i < num; i++) { + if (list2[i] != list[i]) { + ksft_test_result_fail("different value for entry %zu: 0x%llx != 0x%llx\n", + i, + (unsigned long long) list2[i], + (unsigned long long) list[i]); + } + } + + ksft_test_result_pass("listmount tree\n"); +} + +#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t)) + +int main(void) +{ + int ret; + uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | + STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; + + ksft_print_header(); + + ret = statmount(0, 0, NULL, 0, 0); + assert(ret == -1); + if (errno == ENOSYS) + ksft_exit_skip("statmount() syscall not supported\n"); + + setup_namespace(); + + ksft_set_plan(14); + test_listmount_empty_root(); + test_statmount_zero_mask(); + test_statmount_mnt_basic(); + test_statmount_sb_basic(); + test_statmount_mnt_root(); + test_statmount_mnt_point(); + test_statmount_fs_type(); + test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); + test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); + test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); + test_statmount_string(all_mask, str_off(mnt_root), "mount root & all"); + test_statmount_string(all_mask, str_off(mnt_point), "mount point & all"); + test_statmount_string(all_mask, str_off(fs_type), "fs type & all"); + + test_listmount_tree(); + + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0617275d93cc..0f456dbab62f 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 050e9751321c..ad9202335656 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -293,15 +293,13 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE; + unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; unsigned long nr = nbits / 2; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) - if (!(i % 2)) - set_bit(i, (unsigned long *)bitmap); - ASSERT_EQ(nr, count); + for (i = 0; i < nbits; i += 2) + set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, bitmap, &out_dirty); @@ -311,9 +309,10 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) + /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ + for (i = 0; i < nbits; i++) { ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); - ASSERT_EQ(count, out_dirty); + } memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a5963ab9215b..3e0c36b8ddd5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -17,15 +17,6 @@ else ARCH_DIR := $(ARCH) endif -ifeq ($(ARCH),arm64) -arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/ -GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/ -CFLAGS += -I$(GEN_HDRS) - -$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) -endif - LIBKVM += lib/assert.c LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c @@ -223,7 +214,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD\ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP \ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ @@ -233,6 +224,22 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ ifeq ($(ARCH),s390) CFLAGS += -march=z10 endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index 5ea78986e665..9d51b5691349 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -42,13 +42,12 @@ struct pmreg_sets { static uint64_t get_pmcr_n(uint64_t pmcr) { - return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) { - *pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT); - *pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT); + u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); } static uint64_t get_counters_mask(uint64_t n) diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index be7bf5224434..8274ef04301f 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -71,11 +71,12 @@ static const char *config_name(struct vcpu_reg_list *c) for_each_sublist(c, s) { if (!strcmp(s->name, "base")) continue; - strcat(c->name + len, s->name); - len += strlen(s->name) + 1; - c->name[len - 1] = '+'; + if (len) + c->name[len++] = '+'; + strcpy(c->name + len, s->name); + len += strlen(s->name); } - c->name[len - 1] = '\0'; + c->name[len] = '\0'; return c->name; } diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 6bedaea95395..25de4b8bc347 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -458,8 +458,9 @@ void print_reg(const char *prefix, __u64 id) reg_size = "KVM_REG_SIZE_U128"; break; default: - printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,", - (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & REG_MASK); + printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); + return; } switch (id & KVM_REG_RISCV_TYPE_MASK) { @@ -496,8 +497,9 @@ void print_reg(const char *prefix, __u64 id) reg_size, sbi_ext_id_to_str(prefix, id)); break; default: - printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,", - reg_size, id & REG_MASK); + printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,\n", + reg_size, id & ~REG_MASK); + return; } } diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 18ac5c1952a3..83e25bccc139 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -259,7 +259,7 @@ int main(int argc, char **argv) __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test."); + "also handles environment setup for the test.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 18e1f86a6234..50818904397c 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -589,7 +589,7 @@ TEST_F_FORK(layout1, file_and_dir_access_rights) ASSERT_EQ(0, close(ruleset_fd)); } -TEST_F_FORK(layout0, unknown_access_rights) +TEST_F_FORK(layout0, ruleset_with_unknown_access) { __u64 access_mask; @@ -605,6 +605,67 @@ TEST_F_FORK(layout0, unknown_access_rights) } } +TEST_F_FORK(layout0, rule_with_unknown_access) +{ + __u64 access; + struct landlock_path_beneath_attr path_beneath = {}; + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = ACCESS_ALL, + }; + const int ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + + path_beneath.parent_fd = + open(TMP_DIR, O_PATH | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) { + path_beneath.allowed_access = access; + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, + LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + EXPECT_EQ(EINVAL, errno); + } + ASSERT_EQ(0, close(path_beneath.parent_fd)); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, rule_with_unhandled_access) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, + }; + struct landlock_path_beneath_attr path_beneath = {}; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + for (access = 1; access > 0; access <<= 1) { + int err; + + path_beneath.allowed_access = access; + err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0); + if (access == ruleset_attr.handled_access_fs) { + EXPECT_EQ(0, err); + } else { + EXPECT_EQ(-1, err); + EXPECT_EQ(EINVAL, errno); + } + } + + EXPECT_EQ(0, close(path_beneath.parent_fd)); + EXPECT_EQ(0, close(ruleset_fd)); +} + static void add_path_beneath(struct __test_metadata *const _metadata, const int ruleset_fd, const __u64 allowed_access, const char *const path) @@ -3627,7 +3688,7 @@ FIXTURE_TEARDOWN(ftruncate) FIXTURE_VARIANT(ftruncate) { const __u64 handled; - const __u64 permitted; + const __u64 allowed; const int expected_open_result; const int expected_ftruncate_result; }; @@ -3636,7 +3697,7 @@ FIXTURE_VARIANT(ftruncate) FIXTURE_VARIANT_ADD(ftruncate, w_w) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE, - .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE, .expected_open_result = 0, .expected_ftruncate_result = 0, }; @@ -3645,7 +3706,7 @@ FIXTURE_VARIANT_ADD(ftruncate, w_w) { FIXTURE_VARIANT_ADD(ftruncate, t_t) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .allowed = LANDLOCK_ACCESS_FS_TRUNCATE, .expected_open_result = 0, .expected_ftruncate_result = 0, }; @@ -3654,7 +3715,7 @@ FIXTURE_VARIANT_ADD(ftruncate, t_t) { FIXTURE_VARIANT_ADD(ftruncate, wt_w) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE, .expected_open_result = 0, .expected_ftruncate_result = EACCES, }; @@ -3663,8 +3724,7 @@ FIXTURE_VARIANT_ADD(ftruncate, wt_w) { FIXTURE_VARIANT_ADD(ftruncate, wt_wt) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE | - LANDLOCK_ACCESS_FS_TRUNCATE, + .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, .expected_open_result = 0, .expected_ftruncate_result = 0, }; @@ -3673,7 +3733,7 @@ FIXTURE_VARIANT_ADD(ftruncate, wt_wt) { FIXTURE_VARIANT_ADD(ftruncate, wt_t) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .allowed = LANDLOCK_ACCESS_FS_TRUNCATE, .expected_open_result = EACCES, }; @@ -3683,7 +3743,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate) const struct rule rules[] = { { .path = path, - .access = variant->permitted, + .access = variant->allowed, }, {}, }; @@ -3724,7 +3784,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) const struct rule rules[] = { { .path = path, - .access = variant->permitted, + .access = variant->allowed, }, {}, }; diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 929e21c4db05..ea5f727dd257 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -1260,7 +1260,7 @@ TEST_F(mini, network_access_rights) } /* Checks invalid attribute, out of landlock network access range. */ -TEST_F(mini, unknown_access_rights) +TEST_F(mini, ruleset_with_unknown_access) { __u64 access_mask; @@ -1276,6 +1276,63 @@ TEST_F(mini, unknown_access_rights) } } +TEST_F(mini, rule_with_unknown_access) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = ACCESS_ALL, + }; + struct landlock_net_port_attr net_port = { + .port = sock_port_start, + }; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) { + net_port.allowed_access = access; + EXPECT_EQ(-1, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0)); + EXPECT_EQ(EINVAL, errno); + } + EXPECT_EQ(0, close(ruleset_fd)); +} + +TEST_F(mini, rule_with_unhandled_access) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP, + }; + struct landlock_net_port_attr net_port = { + .port = sock_port_start, + }; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + for (access = 1; access > 0; access <<= 1) { + int err; + + net_port.allowed_access = access; + err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0); + if (access == ruleset_attr.handled_access_net) { + EXPECT_EQ(0, err); + } else { + EXPECT_EQ(-1, err); + EXPECT_EQ(EINVAL, errno); + } + } + + EXPECT_EQ(0, close(ruleset_fd)); +} + TEST_F(mini, inval) { const struct landlock_ruleset_attr ruleset_attr = { diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 118e0964bda9..aa646e0661f3 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -44,26 +44,10 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. -ifeq ("$(origin O)", "command line") - KBUILD_OUTPUT := $(O) +ifeq ($(KHDR_INCLUDES),) +KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include endif -ifneq ($(KBUILD_OUTPUT),) - # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot - # expand a shell special character '~'. We use a somewhat tedious way here. - abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) - $(if $(abs_objtree),, \ - $(error failed to create output directory "$(KBUILD_OUTPUT)")) - # $(realpath ...) resolves symlinks - abs_objtree := $(realpath $(abs_objtree)) - KHDR_DIR := ${abs_objtree}/usr/include -else - abs_srctree := $(shell cd $(top_srcdir) && pwd) - KHDR_DIR := ${abs_srctree}/usr/include -endif - -KHDR_INCLUDES := -isystem $(KHDR_DIR) - # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_GEN_FILES) - -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files +all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) define RUN_TESTS BASE_DIR="$(selfdir)"; \ diff --git a/tools/testing/selftests/lsm/.gitignore b/tools/testing/selftests/lsm/.gitignore new file mode 100644 index 000000000000..bd68f6c3fd07 --- /dev/null +++ b/tools/testing/selftests/lsm/.gitignore @@ -0,0 +1 @@ +/*_test diff --git a/tools/testing/selftests/lsm/Makefile b/tools/testing/selftests/lsm/Makefile new file mode 100644 index 000000000000..3f80c0bc093d --- /dev/null +++ b/tools/testing/selftests/lsm/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# First run: make -C ../../../.. headers_install + +CFLAGS += -Wall -O2 $(KHDR_INCLUDES) +LOCAL_HDRS += common.h + +TEST_GEN_PROGS := lsm_get_self_attr_test lsm_list_modules_test \ + lsm_set_self_attr_test + +include ../lib.mk + +$(OUTPUT)/lsm_get_self_attr_test: lsm_get_self_attr_test.c common.c +$(OUTPUT)/lsm_set_self_attr_test: lsm_set_self_attr_test.c common.c +$(OUTPUT)/lsm_list_modules_test: lsm_list_modules_test.c common.c + +EXTRA_CLEAN = $(OUTPUT)/common.o diff --git a/tools/testing/selftests/lsm/common.c b/tools/testing/selftests/lsm/common.c new file mode 100644 index 000000000000..9ad258912646 --- /dev/null +++ b/tools/testing/selftests/lsm/common.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * + * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include "common.h" + +#define PROCATTR "/proc/self/attr/" + +int read_proc_attr(const char *attr, char *value, size_t size) +{ + int fd; + int len; + char *path; + + len = strlen(PROCATTR) + strlen(attr) + 1; + path = calloc(len, 1); + if (path == NULL) + return -1; + sprintf(path, "%s%s", PROCATTR, attr); + + fd = open(path, O_RDONLY); + free(path); + + if (fd < 0) + return -1; + len = read(fd, value, size); + + close(fd); + + /* Ensure value is terminated */ + if (len <= 0 || len == size) + return -1; + value[len] = '\0'; + + path = strchr(value, '\n'); + if (path) + *path = '\0'; + + return 0; +} + +int read_sysfs_lsms(char *lsms, size_t size) +{ + FILE *fp; + size_t red; + + fp = fopen("/sys/kernel/security/lsm", "r"); + if (fp == NULL) + return -1; + red = fread(lsms, 1, size, fp); + fclose(fp); + + if (red <= 0 || red == size) + return -1; + lsms[red] = '\0'; + return 0; +} + +int attr_lsm_count(void) +{ + char *names = calloc(sysconf(_SC_PAGESIZE), 1); + int count = 0; + + if (!names) + return 0; + + if (read_sysfs_lsms(names, sysconf(_SC_PAGESIZE))) + return 0; + + if (strstr(names, "selinux")) + count++; + if (strstr(names, "smack")) + count++; + if (strstr(names, "apparmor")) + count++; + + return count; +} diff --git a/tools/testing/selftests/lsm/common.h b/tools/testing/selftests/lsm/common.h new file mode 100644 index 000000000000..d404329e5eeb --- /dev/null +++ b/tools/testing/selftests/lsm/common.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module infrastructure tests + * + * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com> + */ + +#ifndef lsm_get_self_attr +static inline int lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, + size_t *size, __u32 flags) +{ + return syscall(__NR_lsm_get_self_attr, attr, ctx, size, flags); +} +#endif + +#ifndef lsm_set_self_attr +static inline int lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, + size_t size, __u32 flags) +{ + return syscall(__NR_lsm_set_self_attr, attr, ctx, size, flags); +} +#endif + +#ifndef lsm_list_modules +static inline int lsm_list_modules(__u64 *ids, size_t *size, __u32 flags) +{ + return syscall(__NR_lsm_list_modules, ids, size, flags); +} +#endif + +extern int read_proc_attr(const char *attr, char *value, size_t size); +extern int read_sysfs_lsms(char *lsms, size_t size); +int attr_lsm_count(void); diff --git a/tools/testing/selftests/lsm/config b/tools/testing/selftests/lsm/config new file mode 100644 index 000000000000..1c0c4c020f9c --- /dev/null +++ b/tools/testing/selftests/lsm/config @@ -0,0 +1,3 @@ +CONFIG_SYSFS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c new file mode 100644 index 000000000000..e0e313d9047a --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_get_self_attr system call + * + * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include "../kselftest_harness.h" +#include "common.h" + +static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp) +{ + void *vp; + + vp = (void *)ctxp + sizeof(*ctxp) + ctxp->ctx_len; + return (struct lsm_ctx *)vp; +} + +TEST(size_null_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + + ASSERT_NE(NULL, ctx); + errno = 0; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, NULL, 0)); + ASSERT_EQ(EINVAL, errno); + + free(ctx); +} + +TEST(ctx_null_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + int rc; + + rc = lsm_get_self_attr(LSM_ATTR_CURRENT, NULL, &size, 0); + + if (attr_lsm_count()) { + ASSERT_NE(-1, rc); + ASSERT_NE(1, size); + } else { + ASSERT_EQ(-1, rc); + } +} + +TEST(size_too_small_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size = 1; + + ASSERT_NE(NULL, ctx); + errno = 0; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0)); + if (attr_lsm_count()) { + ASSERT_EQ(E2BIG, errno); + } else { + ASSERT_EQ(EOPNOTSUPP, errno); + } + ASSERT_NE(1, size); + + free(ctx); +} + +TEST(flags_zero_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u64 *syscall_lsms = calloc(page_size, 1); + size_t size; + int lsmcount; + int i; + + ASSERT_NE(NULL, ctx); + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + LSM_FLAG_SINGLE)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(page_size, size); + + lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0); + ASSERT_LE(1, lsmcount); + ASSERT_NE(NULL, syscall_lsms); + + for (i = 0; i < lsmcount; i++) { + errno = 0; + size = page_size; + ctx->id = syscall_lsms[i]; + + if (syscall_lsms[i] == LSM_ID_SELINUX || + syscall_lsms[i] == LSM_ID_SMACK || + syscall_lsms[i] == LSM_ID_APPARMOR) { + ASSERT_EQ(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, + &size, LSM_FLAG_SINGLE)); + } else { + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, + &size, + LSM_FLAG_SINGLE)); + } + } + + free(ctx); +} + +TEST(flags_overset_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size; + + ASSERT_NE(NULL, ctx); + + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx, + &size, 0)); + ASSERT_EQ(EOPNOTSUPP, errno); + + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + LSM_FLAG_SINGLE | + (LSM_FLAG_SINGLE << 1))); + ASSERT_EQ(EINVAL, errno); + + free(ctx); +} + +TEST(basic_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + struct lsm_ctx *ctx = calloc(page_size, 1); + struct lsm_ctx *tctx = NULL; + __u64 *syscall_lsms = calloc(page_size, 1); + char *attr = calloc(page_size, 1); + int cnt_current = 0; + int cnt_exec = 0; + int cnt_fscreate = 0; + int cnt_keycreate = 0; + int cnt_prev = 0; + int cnt_sockcreate = 0; + int lsmcount; + int count; + int i; + + ASSERT_NE(NULL, ctx); + ASSERT_NE(NULL, syscall_lsms); + + lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0); + ASSERT_LE(1, lsmcount); + + for (i = 0; i < lsmcount; i++) { + switch (syscall_lsms[i]) { + case LSM_ID_SELINUX: + cnt_current++; + cnt_exec++; + cnt_fscreate++; + cnt_keycreate++; + cnt_prev++; + cnt_sockcreate++; + break; + case LSM_ID_SMACK: + cnt_current++; + break; + case LSM_ID_APPARMOR: + cnt_current++; + cnt_exec++; + cnt_prev++; + break; + default: + break; + } + } + + if (cnt_current) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0); + ASSERT_EQ(cnt_current, count); + tctx = ctx; + ASSERT_EQ(0, read_proc_attr("current", attr, page_size)); + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_exec) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_EXEC, ctx, &size, 0); + ASSERT_GE(cnt_exec, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("exec", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_fscreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_FSCREATE, ctx, &size, 0); + ASSERT_GE(cnt_fscreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("fscreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_keycreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_KEYCREATE, ctx, &size, 0); + ASSERT_GE(cnt_keycreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("keycreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_prev) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_PREV, ctx, &size, 0); + ASSERT_GE(cnt_prev, count); + if (count > 0) { + tctx = ctx; + ASSERT_EQ(0, read_proc_attr("prev", attr, page_size)); + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + } + if (cnt_sockcreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_SOCKCREATE, ctx, &size, 0); + ASSERT_GE(cnt_sockcreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("sockcreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + + free(ctx); + free(attr); + free(syscall_lsms); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c new file mode 100644 index 000000000000..9df29b1e3497 --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_list_modules system call + * + * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include "../kselftest_harness.h" +#include "common.h" + +TEST(size_null_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, NULL, 0)); + ASSERT_EQ(EFAULT, errno); + + free(syscall_lsms); +} + +TEST(ids_null_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(NULL, &size, 0)); + ASSERT_EQ(EFAULT, errno); + ASSERT_NE(1, size); +} + +TEST(size_too_small_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + size_t size = 1; + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 0)); + ASSERT_EQ(E2BIG, errno); + ASSERT_NE(1, size); + + free(syscall_lsms); +} + +TEST(flags_set_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + size_t size = page_size; + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 7)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(page_size, size); + + free(syscall_lsms); +} + +TEST(correct_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + __u64 *syscall_lsms = calloc(page_size, 1); + char *sysfs_lsms = calloc(page_size, 1); + char *name; + char *cp; + int count; + int i; + + ASSERT_NE(NULL, sysfs_lsms); + ASSERT_NE(NULL, syscall_lsms); + ASSERT_EQ(0, read_sysfs_lsms(sysfs_lsms, page_size)); + + count = lsm_list_modules(syscall_lsms, &size, 0); + ASSERT_LE(1, count); + cp = sysfs_lsms; + for (i = 0; i < count; i++) { + switch (syscall_lsms[i]) { + case LSM_ID_CAPABILITY: + name = "capability"; + break; + case LSM_ID_SELINUX: + name = "selinux"; + break; + case LSM_ID_SMACK: + name = "smack"; + break; + case LSM_ID_TOMOYO: + name = "tomoyo"; + break; + case LSM_ID_APPARMOR: + name = "apparmor"; + break; + case LSM_ID_YAMA: + name = "yama"; + break; + case LSM_ID_LOADPIN: + name = "loadpin"; + break; + case LSM_ID_SAFESETID: + name = "safesetid"; + break; + case LSM_ID_LOCKDOWN: + name = "lockdown"; + break; + case LSM_ID_BPF: + name = "bpf"; + break; + case LSM_ID_LANDLOCK: + name = "landlock"; + break; + default: + name = "INVALID"; + break; + } + ASSERT_EQ(0, strncmp(cp, name, strlen(name))); + cp += strlen(name) + 1; + } + + free(sysfs_lsms); + free(syscall_lsms); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c new file mode 100644 index 000000000000..e9712c6cf596 --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_set_self_attr system call + * + * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include "../kselftest_harness.h" +#include "common.h" + +TEST(ctx_null_lsm_set_self_attr) +{ + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, NULL, + sizeof(struct lsm_ctx), 0)); +} + +TEST(size_too_small_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size = page_size; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, 1, 0)); + + free(ctx); +} + +TEST(flags_zero_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size = page_size; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, size, 1)); + + free(ctx); +} + +TEST(flags_overset_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + char *ctx = calloc(page_size, 1); + size_t size = page_size; + struct lsm_ctx *tctx = (struct lsm_ctx *)ctx; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx, + size, 0)); + + free(ctx); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index cc920c79ff1c..4ff10ea61461 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -45,3 +45,4 @@ mdwe_test gup_longterm mkdirty va_high_addr_switch +hugetlb_fault_after_madv diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 78dfec8bc676..2453add65d12 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -60,7 +60,7 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit -TEST_GEN_PROGS += pagemap_ioctl +TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -72,7 +72,7 @@ TEST_GEN_FILES += mdwe_test TEST_GEN_FILES += hugetlb_fault_after_madv ifneq ($(ARCH),arm64) -TEST_GEN_PROGS += soft-dirty +TEST_GEN_FILES += soft-dirty endif ifeq ($(ARCH),x86_64) @@ -117,8 +117,8 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk -$(TEST_GEN_PROGS): vm_util.c -$(TEST_GEN_FILES): vm_util.c +$(TEST_GEN_PROGS): vm_util.c thp_settings.c +$(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 9b420140ba2b..656afba02dbc 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -33,7 +33,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) FILE *cmdfile = popen(cmd, "r"); if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); + ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); return -1; } @@ -44,7 +44,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) cmdfile = popen(cmd, "r"); if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); + ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); return -1; } @@ -62,14 +62,14 @@ int prereq(void) fd = open("/proc/sys/vm/compact_unevictable_allowed", O_RDONLY | O_NONBLOCK); if (fd < 0) { - perror("Failed to open\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); + ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n", + strerror(errno)); return -1; } if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { - perror("Failed to read from\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); + ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n", + strerror(errno)); close(fd); return -1; } @@ -78,12 +78,13 @@ int prereq(void) if (allowed == '1') return 0; + ksft_print_msg("Compaction isn't allowed\n"); return -1; } int check_compaction(unsigned long mem_free, unsigned int hugepage_size) { - int fd; + int fd, ret = -1; int compaction_index = 0; char initial_nr_hugepages[10] = {0}; char nr_hugepages[10] = {0}; @@ -94,18 +95,21 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); if (fd < 0) { - perror("Failed to open /proc/sys/vm/nr_hugepages"); + ksft_test_result_fail("Failed to open /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); return -1; } if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { - perror("Failed to read from /proc/sys/vm/nr_hugepages"); + ksft_test_result_fail("Failed to read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } /* Start with the initial condition of 0 huge pages*/ if (write(fd, "0", sizeof(char)) != sizeof(char)) { - perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } @@ -114,14 +118,16 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) /* Request a large number of huge pages. The Kernel will allocate as much as it can */ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } lseek(fd, 0, SEEK_SET); if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { - perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } @@ -129,67 +135,58 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) huge pages */ compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); - if (compaction_index > 3) { - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" - "as huge pages\n", compaction_index); - goto close_fd; - } - - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - lseek(fd, 0, SEEK_SET); if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { - perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } - close(fd); - return 0; + if (compaction_index > 3) { + ksft_print_msg("ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + ksft_test_result_fail("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + goto close_fd; + } + + ksft_test_result_pass("Memory compaction succeeded. No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + ret = 0; close_fd: close(fd); - printf("Not OK. Compaction test failed."); - return -1; + return ret; } int main(int argc, char **argv) { struct rlimit lim; - struct map_list *list, *entry; + struct map_list *list = NULL, *entry; size_t page_size, i; void *map = NULL; unsigned long mem_free = 0; unsigned long hugepage_size = 0; long mem_fragmentable_MB = 0; - if (prereq() != 0) { - printf("Either the sysctl compact_unevictable_allowed is not\n" - "set to 1 or couldn't read the proc file.\n" - "Skipping the test\n"); - return KSFT_SKIP; - } + ksft_print_header(); + + if (prereq() || geteuid()) + return ksft_exit_pass(); + + ksft_set_plan(1); lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; - if (setrlimit(RLIMIT_MEMLOCK, &lim)) { - perror("Failed to set rlimit:\n"); - return -1; - } + if (setrlimit(RLIMIT_MEMLOCK, &lim)) + ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno)); page_size = getpagesize(); - list = NULL; - - if (read_memory_info(&mem_free, &hugepage_size) != 0) { - printf("ERROR: Cannot read meminfo\n"); - return -1; - } + if (read_memory_info(&mem_free, &hugepage_size) != 0) + ksft_exit_fail_msg("Failed to get meminfo\n"); mem_fragmentable_MB = mem_free * 0.8 / 1024; @@ -225,7 +222,7 @@ int main(int argc, char **argv) } if (check_compaction(mem_free, hugepage_size) == 0) - return 0; + return ksft_exit_pass(); - return -1; + return ksft_exit_fail(); } diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 7324ce5363c0..363bf5f801be 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -29,15 +29,49 @@ #include "../../../../mm/gup_test.h" #include "../kselftest.h" #include "vm_util.h" +#include "thp_settings.h" static size_t pagesize; static int pagemap_fd; -static size_t thpsize; +static size_t pmdsize; +static int nr_thpsizes; +static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!pmdsize) + return 0; + + orders = 1UL << sz2ord(pmdsize); + orders |= thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + static void detect_huge_zeropage(void) { int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", @@ -734,7 +768,7 @@ enum thp_run { THP_RUN_PARTIAL_SHARED, }; -static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) { char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; size_t size, mmap_size, mremap_size; @@ -759,11 +793,11 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) } /* - * Try to populate a THP. Touch the first sub-page and test if we get - * another sub-page populated automatically. + * Try to populate a THP. Touch the first sub-page and test if + * we get the last sub-page populated automatically. */ mem[0] = 0; - if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } @@ -773,12 +807,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) switch (thp_run) { case THP_RUN_PMD: case THP_RUN_PMD_SWAPOUT: + assert(thpsize == pmdsize); break; case THP_RUN_PTE: case THP_RUN_PTE_SWAPOUT: /* * Trigger PTE-mapping the THP by temporarily mapping a single - * subpage R/O. + * subpage R/O. This is a noop if the THP is not pmdsize (and + * therefore already PTE-mapped). */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { @@ -875,52 +911,60 @@ munmap: munmap(mremap_mem, mremap_size); } -static void run_with_thp(test_fn fn, const char *desc) +static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD); + ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD, size); } -static void run_with_thp_swap(test_fn fn, const char *desc) +static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } -static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE); + ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE, size); } -static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } -static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE); + ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } -static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } -static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } -static void run_with_partial_shared_thp(test_fn fn, const char *desc) +static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); + ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) @@ -1091,15 +1135,27 @@ static void run_anon_test_case(struct test_case const *test_case) run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); - if (thpsize) { - run_with_thp(test_case->fn, test_case->desc); - run_with_thp_swap(test_case->fn, test_case->desc); - run_with_pte_mapped_thp(test_case->fn, test_case->desc); - run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); - run_with_single_pte_of_thp(test_case->fn, test_case->desc); - run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); - run_with_partial_mremap_thp(test_case->fn, test_case->desc); - run_with_partial_shared_thp(test_case->fn, test_case->desc); + for (i = 0; i < nr_thpsizes; i++) { + size_t size = thpsizes[i]; + struct thp_settings settings = *thp_current_settings(); + + settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + thp_push_settings(&settings); + + if (size == pmdsize) { + run_with_thp(test_case->fn, test_case->desc, size); + run_with_thp_swap(test_case->fn, test_case->desc, size); + } + + run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); + run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); + run_with_partial_shared_thp(test_case->fn, test_case->desc, size); + + thp_pop_settings(); } for (i = 0; i < nr_hugetlbsizes; i++) run_with_hugetlb(test_case->fn, test_case->desc, @@ -1120,8 +1176,9 @@ static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; - if (thpsize) - tests += 8; + tests += 6 * nr_thpsizes; + if (pmdsize) + tests += 2; return tests; } @@ -1329,7 +1386,7 @@ static void run_anon_thp_test_cases(void) { int i; - if (!thpsize) + if (!pmdsize) return; ksft_print_msg("[INFO] Anonymous THP tests\n"); @@ -1338,13 +1395,13 @@ static void run_anon_thp_test_cases(void) struct test_case const *test_case = &anon_thp_test_cases[i]; ksft_print_msg("[RUN] %s\n", test_case->desc); - do_run_with_thp(test_case->fn, THP_RUN_PMD); + do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } static int tests_per_anon_thp_test_case(void) { - return thpsize ? 1 : 0; + return pmdsize ? 1 : 0; } typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); @@ -1419,7 +1476,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* For alignment purposes, we need twice the thp size. */ - mmap_size = 2 * thpsize; + mmap_size = 2 * pmdsize; mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { @@ -1434,11 +1491,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* We need a THP-aligned memory area. */ - mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); - smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); - ret = madvise(mem, thpsize, MADV_HUGEPAGE); - ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); if (ret) { ksft_test_result_fail("MADV_HUGEPAGE failed\n"); goto munmap; @@ -1457,7 +1514,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) goto munmap; } - fn(mem, smem, thpsize); + fn(mem, smem, pmdsize); munmap: munmap(mmap_mem, mmap_size); if (mmap_smem != MAP_FAILED) @@ -1650,7 +1707,7 @@ static void run_non_anon_test_case(struct non_anon_test_case const *test_case) run_with_zeropage(test_case->fn, test_case->desc); run_with_memfd(test_case->fn, test_case->desc); run_with_tmpfile(test_case->fn, test_case->desc); - if (thpsize) + if (pmdsize) run_with_huge_zeropage(test_case->fn, test_case->desc); for (i = 0; i < nr_hugetlbsizes; i++) run_with_memfd_hugetlb(test_case->fn, test_case->desc, @@ -1671,7 +1728,7 @@ static int tests_per_non_anon_test_case(void) { int tests = 3 + nr_hugetlbsizes; - if (thpsize) + if (pmdsize) tests += 1; return tests; } @@ -1679,17 +1736,27 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { int err; + struct thp_settings default_settings; + + ksft_print_header(); pagesize = getpagesize(); - thpsize = read_pmd_pagesize(); - if (thpsize) - ksft_print_msg("[INFO] detected THP size: %zu KiB\n", - thpsize / 1024); + pmdsize = read_pmd_pagesize(); + if (pmdsize) { + /* Only if THP is supported. */ + thp_read_settings(&default_settings); + default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + thp_save_settings(); + thp_push_settings(&default_settings); + + ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", + pmdsize / 1024); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); + } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); @@ -1703,6 +1770,11 @@ int main(int argc, char **argv) run_anon_thp_test_cases(); run_non_anon_test_cases(); + if (pmdsize) { + /* Only if THP is supported. */ + thp_restore_settings(); + } + err = ksft_get_fail_cnt(); if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index ec2229136384..cbe99594d319 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -50,39 +50,41 @@ static char *cmd_to_str(unsigned long cmd) void *gup_thread(void *data) { struct gup_test gup = *(struct gup_test *)data; - int i; + int i, status; /* Only report timing information on the *_BENCHMARK commands: */ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) || (cmd == PIN_LONGTERM_BENCHMARK)) { for (i = 0; i < repeats; i++) { gup.size = size; - if (ioctl(gup_fd, cmd, &gup)) - perror("ioctl"), exit(1); + status = ioctl(gup_fd, cmd, &gup); + if (status) + break; pthread_mutex_lock(&print_mutex); - printf("%s: Time: get:%lld put:%lld us", - cmd_to_str(cmd), gup.get_delta_usec, - gup.put_delta_usec); + ksft_print_msg("%s: Time: get:%lld put:%lld us", + cmd_to_str(cmd), gup.get_delta_usec, + gup.put_delta_usec); if (gup.size != size) - printf(", truncated (size: %lld)", gup.size); - printf("\n"); + ksft_print_msg(", truncated (size: %lld)", gup.size); + ksft_print_msg("\n"); pthread_mutex_unlock(&print_mutex); } } else { gup.size = size; - if (ioctl(gup_fd, cmd, &gup)) { - perror("ioctl"); - exit(1); - } + status = ioctl(gup_fd, cmd, &gup); + if (status) + goto return_; pthread_mutex_lock(&print_mutex); - printf("%s: done\n", cmd_to_str(cmd)); + ksft_print_msg("%s: done\n", cmd_to_str(cmd)); if (gup.size != size) - printf("Truncated (size: %lld)\n", gup.size); + ksft_print_msg("Truncated (size: %lld)\n", gup.size); pthread_mutex_unlock(&print_mutex); } +return_: + ksft_test_result(!status, "ioctl status %d\n", status); return NULL; } @@ -170,7 +172,7 @@ int main(int argc, char **argv) touch = 1; break; default: - return -1; + ksft_exit_fail_msg("Wrong argument\n"); } } @@ -198,11 +200,12 @@ int main(int argc, char **argv) } } + ksft_print_header(); + ksft_set_plan(nthreads); + filed = open(file, O_RDWR|O_CREAT); - if (filed < 0) { - perror("open"); - exit(filed); - } + if (filed < 0) + ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno)); gup.nr_pages_per_call = nr_pages; if (write) @@ -213,27 +216,24 @@ int main(int argc, char **argv) switch (errno) { case EACCES: if (getuid()) - printf("Please run this test as root\n"); + ksft_print_msg("Please run this test as root\n"); break; case ENOENT: - if (opendir("/sys/kernel/debug") == NULL) { - printf("mount debugfs at /sys/kernel/debug\n"); - break; - } - printf("check if CONFIG_GUP_TEST is enabled in kernel config\n"); + if (opendir("/sys/kernel/debug") == NULL) + ksft_print_msg("mount debugfs at /sys/kernel/debug\n"); + ksft_print_msg("check if CONFIG_GUP_TEST is enabled in kernel config\n"); break; default: - perror("failed to open " GUP_TEST_FILE); + ksft_print_msg("failed to open %s: %s\n", GUP_TEST_FILE, strerror(errno)); break; } - exit(KSFT_SKIP); + ksft_test_result_skip("Please run this test as root\n"); + return ksft_exit_pass(); } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); - if (p == MAP_FAILED) { - perror("mmap"); - exit(1); - } + if (p == MAP_FAILED) + ksft_exit_fail_msg("mmap: %s\n", strerror(errno)); gup.addr = (unsigned long)p; if (thp == 1) @@ -264,7 +264,8 @@ int main(int argc, char **argv) ret = pthread_join(tid[i], NULL); assert(ret == 0); } + free(tid); - return 0; + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c index 955ef87f382c..267eea2e0e0b 100644 --- a/tools/testing/selftests/mm/hugepage-mmap.c +++ b/tools/testing/selftests/mm/hugepage-mmap.c @@ -22,6 +22,7 @@ #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> +#include "../kselftest.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -37,7 +38,7 @@ static void check_bytes(char *addr) { - printf("First hex is %x\n", *((unsigned int *)addr)); + ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); } static void write_bytes(char *addr) @@ -55,7 +56,7 @@ static int read_bytes(char *addr) check_bytes(addr); for (i = 0; i < LENGTH; i++) if (*(addr + i) != (char)i) { - printf("Mismatch at %lu\n", i); + ksft_print_msg("Error: Mismatch at %lu\n", i); return 1; } return 0; @@ -66,20 +67,20 @@ int main(void) void *addr; int fd, ret; + ksft_print_header(); + ksft_set_plan(1); + fd = memfd_create("hugepage-mmap", MFD_HUGETLB); - if (fd < 0) { - perror("memfd_create() failed"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno)); addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); if (addr == MAP_FAILED) { - perror("mmap"); close(fd); - exit(1); + ksft_exit_fail_msg("mmap(): %s\n", strerror(errno)); } - printf("Returned address is %p\n", addr); + ksft_print_msg("Returned address is %p\n", addr); check_bytes(addr); write_bytes(addr); ret = read_bytes(addr); @@ -87,5 +88,7 @@ int main(void) munmap(addr, LENGTH); close(fd); - return ret; + ksft_test_result(!ret, "Read same data\n"); + + ksft_exit(!ret); } diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index cabd0084f57b..c463d1c09c9b 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -24,6 +24,7 @@ #include <sys/ioctl.h> #include <string.h> #include <stdbool.h> +#include "../kselftest.h" #include "vm_util.h" #define DEFAULT_LENGTH_MB 10UL @@ -34,7 +35,7 @@ static void check_bytes(char *addr) { - printf("First hex is %x\n", *((unsigned int *)addr)); + ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); } static void write_bytes(char *addr, size_t len) @@ -52,7 +53,7 @@ static int read_bytes(char *addr, size_t len) check_bytes(addr); for (i = 0; i < len; i++) if (*(addr + i) != (char)i) { - printf("Mismatch at %lu\n", i); + ksft_print_msg("Mismatch at %lu\n", i); return 1; } return 0; @@ -66,17 +67,13 @@ static void register_region_with_uffd(char *addr, size_t len) /* Create and enable userfaultfd object. */ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == -1) { - perror("userfaultfd"); - exit(1); - } + if (uffd == -1) + ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno)); uffdio_api.api = UFFD_API; uffdio_api.features = 0; - if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { - perror("ioctl-UFFDIO_API"); - exit(1); - } + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) + ksft_exit_fail_msg("ioctl-UFFDIO_API: %s\n", strerror(errno)); /* Create a private anonymous mapping. The memory will be * demand-zero paged--that is, not yet allocated. When we @@ -86,21 +83,17 @@ static void register_region_with_uffd(char *addr, size_t len) addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - perror("mmap"); - exit(1); - } + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap: %s\n", strerror(errno)); - printf("Address returned by mmap() = %p\n", addr); + ksft_print_msg("Address returned by mmap() = %p\n", addr); /* Register the memory range of the mapping we just created for * handling by the userfaultfd object. In mode, we request to track * missing pages (i.e., pages that have not yet been faulted in). */ - if (uffd_register(uffd, addr, len, true, false, false)) { - perror("ioctl-UFFDIO_REGISTER"); - exit(1); - } + if (uffd_register(uffd, addr, len, true, false, false)) + ksft_exit_fail_msg("ioctl-UFFDIO_REGISTER: %s\n", strerror(errno)); } int main(int argc, char *argv[]) @@ -108,10 +101,11 @@ int main(int argc, char *argv[]) size_t length = 0; int ret = 0, fd; - if (argc >= 2 && !strcmp(argv[1], "-h")) { - printf("Usage: %s [length_in_MB]\n", argv[0]); - exit(1); - } + ksft_print_header(); + ksft_set_plan(1); + + if (argc >= 2 && !strcmp(argv[1], "-h")) + ksft_exit_fail_msg("Usage: %s [length_in_MB]\n", argv[0]); /* Read memory length as the first arg if valid, otherwise fallback to * the default length. @@ -123,50 +117,40 @@ int main(int argc, char *argv[]) length = MB_TO_BYTES(length); fd = memfd_create(argv[0], MFD_HUGETLB); - if (fd < 0) { - perror("Open failed"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("Open failed: %s\n", strerror(errno)); /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */ unsigned long suggested_addr = 0x7eaa40000000; void *haddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); - printf("Map haddr: Returned address is %p\n", haddr); - if (haddr == MAP_FAILED) { - perror("mmap1"); - exit(1); - } + ksft_print_msg("Map haddr: Returned address is %p\n", haddr); + if (haddr == MAP_FAILED) + ksft_exit_fail_msg("mmap1: %s\n", strerror(errno)); /* mmap again to a dummy address to hopefully trigger pmd sharing. */ suggested_addr = 0x7daa40000000; void *daddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); - printf("Map daddr: Returned address is %p\n", daddr); - if (daddr == MAP_FAILED) { - perror("mmap3"); - exit(1); - } + ksft_print_msg("Map daddr: Returned address is %p\n", daddr); + if (daddr == MAP_FAILED) + ksft_exit_fail_msg("mmap3: %s\n", strerror(errno)); suggested_addr = 0x7faa40000000; void *vaddr = mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0); - printf("Map vaddr: Returned address is %p\n", vaddr); - if (vaddr == MAP_FAILED) { - perror("mmap2"); - exit(1); - } + ksft_print_msg("Map vaddr: Returned address is %p\n", vaddr); + if (vaddr == MAP_FAILED) + ksft_exit_fail_msg("mmap2: %s\n", strerror(errno)); register_region_with_uffd(haddr, length); void *addr = mremap(haddr, length, length, MREMAP_MAYMOVE | MREMAP_FIXED, vaddr); - if (addr == MAP_FAILED) { - perror("mremap"); - exit(1); - } + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mremap: %s\n", strerror(errno)); - printf("Mremap: Returned address is %p\n", addr); + ksft_print_msg("Mremap: Returned address is %p\n", addr); check_bytes(addr); write_bytes(addr, length); ret = read_bytes(addr, length); @@ -174,12 +158,11 @@ int main(int argc, char *argv[]) munmap(addr, length); addr = mremap(addr, length, length, 0); - if (addr != MAP_FAILED) { - printf("mremap: Expected failure, but call succeeded\n"); - exit(1); - } + if (addr != MAP_FAILED) + ksft_exit_fail_msg("mremap: Expected failure, but call succeeded\n"); close(fd); - return ret; + ksft_test_result(!ret, "Read same data\n"); + ksft_exit(!ret); } diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 030667cb5533..829320a519e7 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -22,13 +22,14 @@ #include "linux/magic.h" #include "vm_util.h" +#include "thp_settings.h" #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; static unsigned long page_size; static int hpage_pmd_nr; +static int anon_order; -#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" #define TEST_FILE "collapse_test_file" @@ -71,78 +72,7 @@ struct file_info { }; static struct file_info finfo; - -enum thp_enabled { - THP_ALWAYS, - THP_MADVISE, - THP_NEVER, -}; - -static const char *thp_enabled_strings[] = { - "always", - "madvise", - "never", - NULL -}; - -enum thp_defrag { - THP_DEFRAG_ALWAYS, - THP_DEFRAG_DEFER, - THP_DEFRAG_DEFER_MADVISE, - THP_DEFRAG_MADVISE, - THP_DEFRAG_NEVER, -}; - -static const char *thp_defrag_strings[] = { - "always", - "defer", - "defer+madvise", - "madvise", - "never", - NULL -}; - -enum shmem_enabled { - SHMEM_ALWAYS, - SHMEM_WITHIN_SIZE, - SHMEM_ADVISE, - SHMEM_NEVER, - SHMEM_DENY, - SHMEM_FORCE, -}; - -static const char *shmem_enabled_strings[] = { - "always", - "within_size", - "advise", - "never", - "deny", - "force", - NULL -}; - -struct khugepaged_settings { - bool defrag; - unsigned int alloc_sleep_millisecs; - unsigned int scan_sleep_millisecs; - unsigned int max_ptes_none; - unsigned int max_ptes_swap; - unsigned int max_ptes_shared; - unsigned long pages_to_scan; -}; - -struct settings { - enum thp_enabled thp_enabled; - enum thp_defrag thp_defrag; - enum shmem_enabled shmem_enabled; - bool use_zero_page; - struct khugepaged_settings khugepaged; - unsigned long read_ahead_kb; -}; - -static struct settings saved_settings; static bool skip_settings_restore; - static int exit_status; static void success(const char *msg) @@ -161,260 +91,34 @@ static void skip(const char *msg) printf(" \e[33m%s\e[0m\n", msg); } -static int read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - -static int write_file(const char *path, const char *buf, size_t buflen) -{ - int fd; - ssize_t numwritten; - - fd = open(path, O_WRONLY); - if (fd == -1) { - printf("open(%s)\n", path); - exit(EXIT_FAILURE); - return 0; - } - - numwritten = write(fd, buf, buflen - 1); - close(fd); - if (numwritten < 1) { - printf("write(%s)\n", buf); - exit(EXIT_FAILURE); - return 0; - } - - return (unsigned int) numwritten; -} - -static int read_string(const char *name, const char *strings[]) +static void restore_settings_atexit(void) { - char path[PATH_MAX]; - char buf[256]; - char *c; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!read_file(path, buf, sizeof(buf))) { - perror(path); - exit(EXIT_FAILURE); - } - - c = strchr(buf, '['); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - - c++; - memmove(buf, c, sizeof(buf) - (c - buf)); - - c = strchr(buf, ']'); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - *c = '\0'; - - ret = 0; - while (strings[ret]) { - if (!strcmp(strings[ret], buf)) - return ret; - ret++; - } - - printf("Failed to parse %s\n", name); - exit(EXIT_FAILURE); -} - -static void write_string(const char *name, const char *val) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!write_file(path, val, strlen(val) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static const unsigned long _read_num(const char *path) -{ - char buf[21]; - - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - - return strtoul(buf, NULL, 10); -} - -static const unsigned long read_num(const char *name) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return _read_num(path); -} - -static void _write_num(const char *path, unsigned long num) -{ - char buf[21]; - - sprintf(buf, "%ld", num); - if (!write_file(path, buf, strlen(buf) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static void write_num(const char *name, unsigned long num) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - _write_num(path, num); -} - -static void write_settings(struct settings *settings) -{ - struct khugepaged_settings *khugepaged = &settings->khugepaged; - - write_string("enabled", thp_enabled_strings[settings->thp_enabled]); - write_string("defrag", thp_defrag_strings[settings->thp_defrag]); - write_string("shmem_enabled", - shmem_enabled_strings[settings->shmem_enabled]); - write_num("use_zero_page", settings->use_zero_page); - - write_num("khugepaged/defrag", khugepaged->defrag); - write_num("khugepaged/alloc_sleep_millisecs", - khugepaged->alloc_sleep_millisecs); - write_num("khugepaged/scan_sleep_millisecs", - khugepaged->scan_sleep_millisecs); - write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); - write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); - write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); - write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); - - if (file_ops && finfo.type == VMA_FILE) - _write_num(finfo.dev_queue_read_ahead_path, - settings->read_ahead_kb); -} - -#define MAX_SETTINGS_DEPTH 4 -static struct settings settings_stack[MAX_SETTINGS_DEPTH]; -static int settings_index; - -static struct settings *current_settings(void) -{ - if (!settings_index) { - printf("Fail: No settings set"); - exit(EXIT_FAILURE); - } - return settings_stack + settings_index - 1; -} + if (skip_settings_restore) + return; -static void push_settings(struct settings *settings) -{ - if (settings_index >= MAX_SETTINGS_DEPTH) { - printf("Fail: Settings stack exceeded"); - exit(EXIT_FAILURE); - } - settings_stack[settings_index++] = *settings; - write_settings(current_settings()); -} + printf("Restore THP and khugepaged settings..."); + thp_restore_settings(); + success("OK"); -static void pop_settings(void) -{ - if (settings_index <= 0) { - printf("Fail: Settings stack empty"); - exit(EXIT_FAILURE); - } - --settings_index; - write_settings(current_settings()); + skip_settings_restore = true; } static void restore_settings(int sig) { - if (skip_settings_restore) - goto out; - - printf("Restore THP and khugepaged settings..."); - write_settings(&saved_settings); - success("OK"); - if (sig) - exit(EXIT_FAILURE); -out: - exit(exit_status); + /* exit() will invoke the restore_settings_atexit handler. */ + exit(sig ? EXIT_FAILURE : exit_status); } static void save_settings(void) { printf("Save THP and khugepaged settings..."); - saved_settings = (struct settings) { - .thp_enabled = read_string("enabled", thp_enabled_strings), - .thp_defrag = read_string("defrag", thp_defrag_strings), - .shmem_enabled = - read_string("shmem_enabled", shmem_enabled_strings), - .use_zero_page = read_num("use_zero_page"), - }; - saved_settings.khugepaged = (struct khugepaged_settings) { - .defrag = read_num("khugepaged/defrag"), - .alloc_sleep_millisecs = - read_num("khugepaged/alloc_sleep_millisecs"), - .scan_sleep_millisecs = - read_num("khugepaged/scan_sleep_millisecs"), - .max_ptes_none = read_num("khugepaged/max_ptes_none"), - .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), - .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), - .pages_to_scan = read_num("khugepaged/pages_to_scan"), - }; if (file_ops && finfo.type == VMA_FILE) - saved_settings.read_ahead_kb = - _read_num(finfo.dev_queue_read_ahead_path); + thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path); + thp_save_settings(); success("OK"); + atexit(restore_settings_atexit); signal(SIGTERM, restore_settings); signal(SIGINT, restore_settings); signal(SIGHUP, restore_settings); @@ -793,7 +497,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, struct mem_ops *ops, bool expect) { int ret; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); printf("%s...", msg); @@ -803,7 +507,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, */ settings.thp_enabled = THP_NEVER; settings.shmem_enabled = SHMEM_NEVER; - push_settings(&settings); + thp_push_settings(&settings); /* Clear VM_NOHUGEPAGE */ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); @@ -815,7 +519,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, else success("OK"); - pop_settings(); + thp_pop_settings(); } static void madvise_collapse(const char *msg, char *p, int nr_hpages, @@ -845,13 +549,13 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages, madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ - full_scans = read_num("khugepaged/full_scans") + 2; + full_scans = thp_read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { if (ops->check_huge(p, nr_hpages)) break; - if (read_num("khugepaged/full_scans") >= full_scans) + if (thp_read_num("khugepaged/full_scans") >= full_scans) break; printf("."); usleep(TICK); @@ -904,13 +608,18 @@ static bool is_tmpfs(struct mem_ops *ops) return ops == &__file_ops && finfo.type == VMA_SHMEM; } +static bool is_anon(struct mem_ops *ops) +{ + return ops == &__anon_ops; +} + static void alloc_at_fault(void) { - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); char *p; settings.thp_enabled = THP_ALWAYS; - push_settings(&settings); + thp_push_settings(&settings); p = alloc_mapping(1); *p = 1; @@ -920,7 +629,7 @@ static void alloc_at_fault(void) else fail("Fail"); - pop_settings(); + thp_pop_settings(); madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); @@ -968,11 +677,12 @@ static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); void *p; + int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1; settings.khugepaged.max_ptes_none = max_ptes_none; - push_settings(&settings); + thp_push_settings(&settings); p = ops->setup_area(1); @@ -983,10 +693,10 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o goto skip; } - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, ops, !c->enforce_pte_scan_limits); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); if (c->enforce_pte_scan_limits) { ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); @@ -997,7 +707,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o } skip: ops->cleanup_area(p, hpage_pmd_size); - pop_settings(); + thp_pop_settings(); } static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) @@ -1028,7 +738,7 @@ out: static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); + int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"); void *p; p = ops->setup_area(1); @@ -1245,11 +955,11 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o fail("Fail"); ops->fault(p, 0, page_size); - write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); + thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", p, 1, ops, true); - write_num("khugepaged/max_ptes_shared", - current_settings()->khugepaged.max_ptes_shared); + thp_write_num("khugepaged/max_ptes_shared", + thp_current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); ops->cleanup_area(p, hpage_pmd_size); @@ -1270,7 +980,7 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); + int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"); int wstatus; void *p; @@ -1373,7 +1083,7 @@ static void madvise_retracted_page_tables(struct collapse_context *c, static void usage(void) { - fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n"); + fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] <test type> [dir]\n\n"); fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n"); fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n"); fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n"); @@ -1382,15 +1092,34 @@ static void usage(void) fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\n\tSupported Options:\n"); + fprintf(stderr, "\t\t-h: This help message.\n"); + fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); exit(1); } -static void parse_test_type(int argc, const char **argv) +static void parse_test_type(int argc, char **argv) { + int opt; char *buf; const char *token; - if (argc == 1) { + while ((opt = getopt(argc, argv, "s:h")) != -1) { + switch (opt) { + case 's': + anon_order = atoi(optarg); + break; + case 'h': + default: + usage(); + } + } + + argv += optind; + argc -= optind; + + if (argc == 0) { /* Backwards compatibility */ khugepaged_context = &__khugepaged_context; madvise_context = &__madvise_context; @@ -1398,7 +1127,7 @@ static void parse_test_type(int argc, const char **argv) return; } - buf = strdup(argv[1]); + buf = strdup(argv[0]); token = strsep(&buf, ":"); if (!strcmp(token, "all")) { @@ -1432,13 +1161,16 @@ static void parse_test_type(int argc, const char **argv) if (!file_ops) return; - if (argc != 3) + if (argc != 2) usage(); + + get_finfo(argv[1]); } -int main(int argc, const char **argv) +int main(int argc, char **argv) { - struct settings default_settings = { + int hpage_pmd_order; + struct thp_settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, .shmem_enabled = SHMEM_ADVISE, @@ -1460,9 +1192,6 @@ int main(int argc, const char **argv) parse_test_type(argc, argv); - if (file_ops) - get_finfo(argv[2]); - setbuf(stdout, NULL); page_size = getpagesize(); @@ -1472,14 +1201,17 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } hpage_pmd_nr = hpage_pmd_size / page_size; + hpage_pmd_order = __builtin_ctz(hpage_pmd_nr); default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; + default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; + default_settings.hugepages[anon_order].enabled = THP_ALWAYS; save_settings(); - push_settings(&default_settings); + thp_push_settings(&default_settings); alloc_at_fault(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 957b9e18c729..9b298f6a04b3 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -62,6 +62,9 @@ static void test_mlock_limit(int fd) char *mem; len = mlock_limit_cur; + if (len % page_size != 0) + len = (len/page_size) * page_size; + mem = mmap(NULL, len, prot, mode, fd, 0); if (mem == MAP_FAILED) { fail("unable to mmap secret memory\n"); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 0161fb49fc6e..d59517ed3d48 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -36,6 +36,7 @@ int pagemap_fd; int uffd; int page_size; int hpage_size; +const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -94,19 +95,19 @@ int init_uffd(void) uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd == -1) - ksft_exit_fail_msg("uffd syscall failed\n"); + return uffd; uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | UFFD_FEATURE_WP_HUGETLBFS_SHMEM; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) - ksft_exit_fail_msg("UFFDIO_API\n"); + return -1; if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api); + return -1; return 0; } @@ -1149,17 +1150,17 @@ int sanity_tests(void) munmap(mem, mem_size); /* 9. Memory mapped file */ - fd = open(__FILE__, O_RDONLY); + fd = open(progname, O_RDONLY); if (fd < 0) - ksft_exit_fail_msg("%s Memory mapped file\n"); + ksft_exit_fail_msg("%s Memory mapped file\n", __func__); - ret = stat(__FILE__, &sbuf); + ret = stat(progname, &sbuf); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); tmp_buf = malloc(sbuf.st_size); memcpy(tmp_buf, fmem, sbuf.st_size); @@ -1189,7 +1190,7 @@ int sanity_tests(void) fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, buf_size); wp_addr_range(fmem, buf_size); @@ -1472,13 +1473,19 @@ static void transact_test(int page_size) extra_thread_faults); } -int main(void) +int main(int argc, char *argv[]) { int mem_size, shmid, buf_size, fd, i, ret; char *mem, *map, *fmem; struct stat sbuf; + progname = argv[0]; + ksft_print_header(); + + if (init_uffd()) + return ksft_exit_pass(); + ksft_set_plan(115); page_size = getpagesize(); @@ -1488,9 +1495,6 @@ int main(void) if (pagemap_fd < 0) return -EINVAL; - if (init_uffd()) - ksft_exit_fail_msg("uffd init failed\n"); - /* 1. Sanity testing */ sanity_tests_sd(); @@ -1595,7 +1599,7 @@ int main(void) fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, sbuf.st_size); wp_addr_range(fmem, sbuf.st_size); @@ -1623,7 +1627,7 @@ int main(void) fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, buf_size); wp_addr_range(fmem, buf_size); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index cc16f6ca8533..246d53a5d7f2 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -5,6 +5,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +count_total=0 count_pass=0 count_fail=0 count_skip=0 @@ -17,6 +18,7 @@ usage: ${BASH_SOURCE[0]:-$0} [ options ] -a: run all tests, including extra ones -t: specify specific categories to tests to run -h: display this message + -n: disable TAP output The default behavior is to run required tests only. If -a is specified, will run all tests. @@ -77,12 +79,14 @@ EOF } RUN_ALL=false +TAP_PREFIX="# " -while getopts "aht:" OPT; do +while getopts "aht:n" OPT; do case ${OPT} in "a") RUN_ALL=true ;; "h") usage ;; "t") VM_SELFTEST_ITEMS=${OPTARG} ;; + "n") TAP_PREFIX= ;; esac done shift $((OPTIND -1)) @@ -184,30 +188,52 @@ fi VADDR64=0 echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1 +tap_prefix() { + sed -e "s/^/${TAP_PREFIX}/" +} + +tap_output() { + if [[ ! -z "$TAP_PREFIX" ]]; then + read str + echo $str + fi +} + +pretty_name() { + echo "$*" | sed -e 's/^\(bash \)\?\.\///' +} + # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { if test_selected ${CATEGORY}; then + local test=$(pretty_name "$*") local title="running $*" local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) - printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix - "$@" - local ret=$? + ("$@" 2>&1) | tap_prefix + local ret=${PIPESTATUS[0]} + count_total=$(( count_total + 1 )) if [ $ret -eq 0 ]; then count_pass=$(( count_pass + 1 )) - echo "[PASS]" + echo "[PASS]" | tap_prefix + echo "ok ${count_total} ${test}" | tap_output elif [ $ret -eq $ksft_skip ]; then count_skip=$(( count_skip + 1 )) - echo "[SKIP]" + echo "[SKIP]" | tap_prefix + echo "ok ${count_total} ${test} # SKIP" | tap_output exitcode=$ksft_skip else count_fail=$(( count_fail + 1 )) - echo "[FAIL]" + echo "[FAIL]" | tap_prefix + echo "not ok ${count_total} ${test} # exit=$ret" | tap_output exitcode=1 fi fi # test_selected } +echo "TAP version 13" | tap_output + CATEGORY="hugetlb" run_test ./hugepage-mmap shmmax=$(cat /proc/sys/kernel/shmmax) @@ -223,14 +249,17 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) # For this test, we need one and just one huge page echo 1 > /proc/sys/vm/nr_hugepages CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv +# Restore the previous number of huge pages, since further tests rely on it +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages if test_selected "hugetlb"; then - echo "NOTE: These hugetlb tests provide minimal coverage. Use" - echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" - echo " hugetlb regression testing." + echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix + echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" | tap_prefix + echo " hugetlb regression testing." | tap_prefix fi CATEGORY="mmap" run_test ./map_fixed_noreplace @@ -309,7 +338,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate -echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 @@ -331,8 +360,6 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests -run_test ./ksm_functional_tests - # protection_keys tests if [ -x ./protection_keys_32 ] then @@ -356,6 +383,8 @@ CATEGORY="cow" run_test ./cow CATEGORY="thp" run_test ./khugepaged +CATEGORY="thp" run_test ./khugepaged -s 2 + CATEGORY="thp" run_test ./transhuge-stress -d 20 CATEGORY="thp" run_test ./split_huge_page_test @@ -366,6 +395,7 @@ CATEGORY="mkdirty" run_test ./mkdirty CATEGORY="mdwe" run_test ./mdwe_test -echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" +echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix +echo "1..${count_total}" | tap_output exit $exitcode diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c new file mode 100644 index 000000000000..a4163438108e --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "thp_settings.h" + +#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" +#define MAX_SETTINGS_DEPTH 4 +static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH]; +static int settings_index; +static struct thp_settings saved_settings; +static char dev_queue_read_ahead_path[PATH_MAX]; + +static const char * const thp_enabled_strings[] = { + "never", + "always", + "inherit", + "madvise", + NULL +}; + +static const char * const thp_defrag_strings[] = { + "always", + "defer", + "defer+madvise", + "madvise", + "never", + NULL +}; + +static const char * const shmem_enabled_strings[] = { + "always", + "within_size", + "advise", + "never", + "deny", + "force", + NULL +}; + +int read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) { + printf("open(%s)\n", path); + exit(EXIT_FAILURE); + return 0; + } + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) { + printf("write(%s)\n", buf); + exit(EXIT_FAILURE); + return 0; + } + + return (unsigned int) numwritten; +} + +const unsigned long read_num(const char *path) +{ + char buf[21]; + + if (read_file(path, buf, sizeof(buf)) < 0) { + perror("read_file()"); + exit(EXIT_FAILURE); + } + + return strtoul(buf, NULL, 10); +} + +void write_num(const char *path, unsigned long num) +{ + char buf[21]; + + sprintf(buf, "%ld", num); + if (!write_file(path, buf, strlen(buf) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +int thp_read_string(const char *name, const char * const strings[]) +{ + char path[PATH_MAX]; + char buf[256]; + char *c; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!read_file(path, buf, sizeof(buf))) { + perror(path); + exit(EXIT_FAILURE); + } + + c = strchr(buf, '['); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + + c++; + memmove(buf, c, sizeof(buf) - (c - buf)); + + c = strchr(buf, ']'); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + *c = '\0'; + + ret = 0; + while (strings[ret]) { + if (!strcmp(strings[ret], buf)) + return ret; + ret++; + } + + printf("Failed to parse %s\n", name); + exit(EXIT_FAILURE); +} + +void thp_write_string(const char *name, const char *val) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(path, val, strlen(val) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +const unsigned long thp_read_num(const char *name) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return read_num(path); +} + +void thp_write_num(const char *name, unsigned long num) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + write_num(path, num); +} + +void thp_read_settings(struct thp_settings *settings) +{ + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int i; + + *settings = (struct thp_settings) { + .thp_enabled = thp_read_string("enabled", thp_enabled_strings), + .thp_defrag = thp_read_string("defrag", thp_defrag_strings), + .shmem_enabled = + thp_read_string("shmem_enabled", shmem_enabled_strings), + .use_zero_page = thp_read_num("use_zero_page"), + }; + settings->khugepaged = (struct khugepaged_settings) { + .defrag = thp_read_num("khugepaged/defrag"), + .alloc_sleep_millisecs = + thp_read_num("khugepaged/alloc_sleep_millisecs"), + .scan_sleep_millisecs = + thp_read_num("khugepaged/scan_sleep_millisecs"), + .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"), + .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"), + .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"), + .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"), + }; + if (dev_queue_read_ahead_path[0]) + settings->read_ahead_kb = read_num(dev_queue_read_ahead_path); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) { + settings->hugepages[i].enabled = THP_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + settings->hugepages[i].enabled = + thp_read_string(path, thp_enabled_strings); + } +} + +void thp_write_settings(struct thp_settings *settings) +{ + struct khugepaged_settings *khugepaged = &settings->khugepaged; + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int enabled; + int i; + + thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]); + thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]); + thp_write_string("shmem_enabled", + shmem_enabled_strings[settings->shmem_enabled]); + thp_write_num("use_zero_page", settings->use_zero_page); + + thp_write_num("khugepaged/defrag", khugepaged->defrag); + thp_write_num("khugepaged/alloc_sleep_millisecs", + khugepaged->alloc_sleep_millisecs); + thp_write_num("khugepaged/scan_sleep_millisecs", + khugepaged->scan_sleep_millisecs); + thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); + thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); + thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); + thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); + + if (dev_queue_read_ahead_path[0]) + write_num(dev_queue_read_ahead_path, settings->read_ahead_kb); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + enabled = settings->hugepages[i].enabled; + thp_write_string(path, thp_enabled_strings[enabled]); + } +} + +struct thp_settings *thp_current_settings(void) +{ + if (!settings_index) { + printf("Fail: No settings set"); + exit(EXIT_FAILURE); + } + return settings_stack + settings_index - 1; +} + +void thp_push_settings(struct thp_settings *settings) +{ + if (settings_index >= MAX_SETTINGS_DEPTH) { + printf("Fail: Settings stack exceeded"); + exit(EXIT_FAILURE); + } + settings_stack[settings_index++] = *settings; + thp_write_settings(thp_current_settings()); +} + +void thp_pop_settings(void) +{ + if (settings_index <= 0) { + printf("Fail: Settings stack empty"); + exit(EXIT_FAILURE); + } + --settings_index; + thp_write_settings(thp_current_settings()); +} + +void thp_restore_settings(void) +{ + thp_write_settings(&saved_settings); +} + +void thp_save_settings(void) +{ + thp_read_settings(&saved_settings); +} + +void thp_set_read_ahead_path(char *path) +{ + if (!path) { + dev_queue_read_ahead_path[0] = '\0'; + return; + } + + strncpy(dev_queue_read_ahead_path, path, + sizeof(dev_queue_read_ahead_path)); + dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; +} + +unsigned long thp_supported_orders(void) +{ + unsigned long orders = 0; + char path[PATH_MAX]; + char buf[256]; + int ret; + int i; + + for (i = 0; i < NR_ORDERS; i++) { + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + ret = read_file(path, buf, sizeof(buf)); + if (ret) + orders |= 1UL << i; + } + + return orders; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h new file mode 100644 index 000000000000..71cbff05f4c7 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __THP_SETTINGS_H__ +#define __THP_SETTINGS_H__ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +enum thp_enabled { + THP_NEVER, + THP_ALWAYS, + THP_INHERIT, + THP_MADVISE, +}; + +enum thp_defrag { + THP_DEFRAG_ALWAYS, + THP_DEFRAG_DEFER, + THP_DEFRAG_DEFER_MADVISE, + THP_DEFRAG_MADVISE, + THP_DEFRAG_NEVER, +}; + +enum shmem_enabled { + SHMEM_ALWAYS, + SHMEM_WITHIN_SIZE, + SHMEM_ADVISE, + SHMEM_NEVER, + SHMEM_DENY, + SHMEM_FORCE, +}; + +#define NR_ORDERS 20 + +struct hugepages_settings { + enum thp_enabled enabled; +}; + +struct khugepaged_settings { + bool defrag; + unsigned int alloc_sleep_millisecs; + unsigned int scan_sleep_millisecs; + unsigned int max_ptes_none; + unsigned int max_ptes_swap; + unsigned int max_ptes_shared; + unsigned long pages_to_scan; +}; + +struct thp_settings { + enum thp_enabled thp_enabled; + enum thp_defrag thp_defrag; + enum shmem_enabled shmem_enabled; + bool use_zero_page; + struct khugepaged_settings khugepaged; + unsigned long read_ahead_kb; + struct hugepages_settings hugepages[NR_ORDERS]; +}; + +int read_file(const char *path, char *buf, size_t buflen); +int write_file(const char *path, const char *buf, size_t buflen); +const unsigned long read_num(const char *path); +void write_num(const char *path, unsigned long num); + +int thp_read_string(const char *name, const char * const strings[]); +void thp_write_string(const char *name, const char *val); +const unsigned long thp_read_num(const char *name); +void thp_write_num(const char *name, unsigned long num); + +void thp_write_settings(struct thp_settings *settings); +void thp_read_settings(struct thp_settings *settings); +struct thp_settings *thp_current_settings(void); +void thp_push_settings(struct thp_settings *settings); +void thp_pop_settings(void); +void thp_restore_settings(void); +void thp_save_settings(void); + +void thp_set_read_ahead_path(char *path); +unsigned long thp_supported_orders(void); + +#endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 16ed4dfa7359..622987f12c89 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -3,7 +3,8 @@ Before running this huge pages for each huge page size must have been reserved. - For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must + be used. Also shmmax must be increased. And you need to run as root to work around some weird permissions in shm. And nothing using huge pages should run in parallel. diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 02b89860e193..b0ac0ec2356d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -17,6 +17,7 @@ bool map_shared; bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; +uffd_test_case_ops_t *uffd_test_case_ops; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -262,7 +263,7 @@ static inline void munmap_area(void **area) *area = NULL; } -static void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(void) { size_t i; @@ -298,7 +299,11 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) unsigned long nr, cpu; int ret; - uffd_test_ctx_clear(); + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { + ret = uffd_test_case_ops->pre_alloc(errmsg); + if (ret) + return ret; + } ret = uffd_test_ops->allocate_area((void **)&area_src, true); ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); @@ -308,6 +313,12 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) return ret; } + if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { + ret = uffd_test_case_ops->post_alloc(errmsg); + if (ret) + return ret; + } + ret = userfaultfd_open(&features); if (ret) { if (errmsg) @@ -620,6 +631,30 @@ int copy_page(int ufd, unsigned long offset, bool wp) return __copy_page(ufd, offset, false, wp); } +int move_page(int ufd, unsigned long offset, unsigned long len) +{ + struct uffdio_move uffdio_move; + + if (offset + len > nr_pages * page_size) + err("unexpected offset %lu and length %lu\n", offset, len); + uffdio_move.dst = (unsigned long) area_dst + offset; + uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.len = len; + uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; + uffdio_move.move = 0; + if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + /* real retval in uffdio_move.move */ + if (uffdio_move.move != -EEXIST) + err("UFFDIO_MOVE error: %"PRId64, + (int64_t)uffdio_move.move); + wake_range(ufd, uffdio_move.dst, len); + } else if (uffdio_move.move != len) { + err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); + } else + return 1; + return 0; +} + int uffd_open_dev(unsigned int flags) { int fd, uffd; diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 7c4fa964c3b0..cb055282c89c 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -90,6 +90,12 @@ struct uffd_test_ops { }; typedef struct uffd_test_ops uffd_test_ops_t; +struct uffd_test_case_ops { + int (*pre_alloc)(const char **errmsg); + int (*post_alloc)(const char **errmsg); +}; +typedef struct uffd_test_case_ops uffd_test_case_ops_t; + extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; @@ -102,15 +108,18 @@ extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; +extern uffd_test_case_ops_t *uffd_test_case_ops; void uffd_stats_report(struct uffd_args *args, int n_cpus); int uffd_test_ctx_init(uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(void); int userfaultfd_open(uint64_t *features); int uffd_read_msg(int ufd, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); int copy_page(int ufd, unsigned long offset, bool wp); +int move_page(int ufd, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 469e0476af26..7e83829bbb33 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -323,8 +323,10 @@ static int userfaultfd_stress(void) uffd_stats_reset(args, nr_cpus); /* bounce pass */ - if (stress(args)) + if (stress(args)) { + uffd_test_ctx_clear(); return 1; + } /* Clear all the write protections if there is any */ if (test_uffdio_wp) @@ -354,6 +356,7 @@ static int userfaultfd_stress(void) uffd_stats_report(args, nr_cpus); } + uffd_test_ctx_clear(); return 0; } diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2709a34a39c5..cce90a10515a 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -23,6 +23,9 @@ #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \ MEM_HUGETLB | MEM_HUGETLB_PRIVATE) +#define ALIGN_UP(x, align_to) \ + ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) + struct mem_type { const char *name; unsigned int mem_flag; @@ -78,6 +81,7 @@ typedef struct { uffd_test_fn uffd_fn; unsigned int mem_targets; uint64_t uffd_feature_required; + uffd_test_case_ops_t *test_case_ops; } uffd_test_case_t; static void uffd_test_report(void) @@ -185,6 +189,7 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, { map_shared = mem_type->shared; uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) page_size = default_huge_page_size(); @@ -1062,6 +1067,188 @@ static void uffd_poison_test(uffd_test_args_t *targs) uffd_test_pass(); } +static void +uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, + unsigned long len) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + err("unexpected msg event %u", msg->event); + + if (msg->arg.pagefault.flags & + (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) + err("unexpected fault type %llu", msg->arg.pagefault.flags); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(len-1); + + if (move_page(uffd, offset, len)) + args->missing_faults++; +} + +static void uffd_move_handle_fault(struct uffd_msg *msg, + struct uffd_args *args) +{ + uffd_move_handle_fault_common(msg, args, page_size); +} + +static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, + struct uffd_args *args) +{ + uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); +} + +static void +uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, + void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +{ + unsigned long nr; + pthread_t uffd_mon; + char c; + unsigned long long count; + struct uffd_args args = { 0 }; + char *orig_area_src, *orig_area_dst; + unsigned long step_size, step_count; + unsigned long src_offs = 0; + unsigned long dst_offs = 0; + + /* Prevent source pages from being mapped more than once */ + if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + err("madvise(MADV_DONTFORK) failure"); + + if (uffd_register(uffd, area_dst, nr_pages * page_size, + true, false, false)) + err("register failure"); + + args.handle_fault = handle_fault; + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + + step_size = chunk_size / page_size; + step_count = nr_pages / step_size; + + if (chunk_size > page_size) { + char *aligned_src = ALIGN_UP(area_src, chunk_size); + char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + + if (aligned_src != area_src || aligned_dst != area_dst) { + src_offs = (aligned_src - area_src) / page_size; + dst_offs = (aligned_dst - area_dst) / page_size; + step_count--; + } + orig_area_src = area_src; + orig_area_dst = area_dst; + area_src = aligned_src; + area_dst = aligned_dst; + } + + /* + * Read each of the pages back using the UFFD-registered mapping. We + * expect that the first time we touch a page, it will result in a missing + * fault. uffd_poll_thread will resolve the fault by moving source + * page to destination. + */ + for (nr = 0; nr < step_count * step_size; nr += step_size) { + unsigned long i; + + /* Check area_src content */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_src, nr + i); + if (count != count_verify[src_offs + nr + i]) + err("nr %lu source memory invalid %llu %llu\n", + nr + i, count, count_verify[src_offs + nr + i]); + } + + /* Faulting into area_dst should move the page or the huge page */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_dst, nr + i); + if (count != count_verify[dst_offs + nr + i]) + err("nr %lu memory corruption %llu %llu\n", + nr, count, count_verify[dst_offs + nr + i]); + } + + /* Re-check area_src content which should be empty */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_src, nr + i); + if (count != 0) + err("nr %lu move failed %llu %llu\n", + nr, count, count_verify[src_offs + nr + i]); + } + } + if (step_size > page_size) { + area_src = orig_area_src; + area_dst = orig_area_dst; + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + err("pipe write"); + if (pthread_join(uffd_mon, NULL)) + err("join() failed"); + + if (args.missing_faults != step_count || args.minor_faults != 0) + uffd_test_fail("stats check error"); + else + uffd_test_pass(); +} + +static void uffd_move_test(uffd_test_args_t *targs) +{ + uffd_move_test_common(targs, page_size, uffd_move_handle_fault); +} + +static void uffd_move_pmd_test(uffd_test_args_t *targs) +{ + if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + err("madvise(MADV_HUGEPAGE) failure"); + uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_pmd_handle_fault); +} + +static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +{ + if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + err("madvise(MADV_NOHUGEPAGE) failure"); + uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_pmd_handle_fault); +} + +static int prevent_hugepages(const char **errmsg) +{ + /* This should be done before source area is populated */ + if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ + if (errno != EINVAL) { + if (errmsg) + *errmsg = "madvise(MADV_NOHUGEPAGE) failed"; + return -errno; + } + } + return 0; +} + +static int request_hugepages(const char **errmsg) +{ + /* This should be done before source area is populated */ + if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (errmsg) { + *errmsg = (errno == EINVAL) ? + "CONFIG_TRANSPARENT_HUGEPAGE is not set" : + "madvise(MADV_HUGEPAGE) failed"; + } + return -errno; + } + return 0; +} + +struct uffd_test_case_ops uffd_move_test_case_ops = { + .post_alloc = prevent_hugepages, +}; + +struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { + .post_alloc = request_hugepages, +}; + /* * Test the returned uffdio_register.ioctls with different register modes. * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. @@ -1140,6 +1327,27 @@ uffd_test_case_t uffd_tests[] = { .uffd_feature_required = 0, }, { + .name = "move", + .uffd_fn = uffd_move_test, + .mem_targets = MEM_ANON, + .uffd_feature_required = UFFD_FEATURE_MOVE, + .test_case_ops = &uffd_move_test_case_ops, + }, + { + .name = "move-pmd", + .uffd_fn = uffd_move_pmd_test, + .mem_targets = MEM_ANON, + .uffd_feature_required = UFFD_FEATURE_MOVE, + .test_case_ops = &uffd_move_test_pmd_case_ops, + }, + { + .name = "move-pmd-split", + .uffd_fn = uffd_move_pmd_split_test, + .mem_targets = MEM_ANON, + .uffd_feature_required = UFFD_FEATURE_MOVE, + .test_case_ops = &uffd_move_test_pmd_case_ops, + }, + { .name = "wp-fork", .uffd_fn = uffd_wp_fork_test, .mem_targets = MEM_ALL, @@ -1319,6 +1527,7 @@ int main(int argc, char *argv[]) continue; } test->uffd_fn(&args); + uffd_test_ctx_clear(); } } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 3082b40492dd..05736c615734 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -4,6 +4,7 @@ #include <dirent.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> +#include <linux/fs.h> #include <sys/syscall.h> #include <unistd.h> #include "../kselftest.h" @@ -28,19 +29,92 @@ uint64_t pagemap_get_entry(int fd, char *start) return entry; } +static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) +{ + struct pm_scan_arg arg; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + psize()); + arg.vec = (uintptr_t)r; + arg.vec_len = 1; + arg.flags = 0; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = 0; + arg.category_inverted = 0; + arg.category_mask = 0; + arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; + arg.return_mask = arg.category_anyof_mask; + + return ioctl(fd, PAGEMAP_SCAN, &arg); +} + +static uint64_t pagemap_scan_get_categories(int fd, char *start) +{ + struct page_region r; + long ret; + + ret = __pagemap_scan_get_categories(fd, start, &r); + if (ret < 0) + ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno)); + if (ret == 0) + return 0; + return r.categories; +} + +/* `start` is any valid address. */ +static bool pagemap_scan_supported(int fd, char *start) +{ + static int supported = -1; + int ret; + + if (supported != -1) + return supported; + + /* Provide an invalid address in order to trigger EFAULT. */ + ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL); + if (ret == 0) + ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n"); + + supported = errno == EFAULT; + + return supported; +} + +static bool page_entry_is(int fd, char *start, char *desc, + uint64_t pagemap_flags, uint64_t pagescan_flags) +{ + bool m = pagemap_get_entry(fd, start) & pagemap_flags; + + if (pagemap_scan_supported(fd, start)) { + bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; + + if (m == s) + return m; + + ksft_exit_fail_msg( + "read and ioctl return unmatched results for %s: %d %d", desc, m, s); + } + return m; +} + bool pagemap_is_softdirty(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY; + return page_entry_is(fd, start, "soft-dirty", + PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); } bool pagemap_is_swapped(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SWAP; + return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED); } bool pagemap_is_populated(int fd, char *start) { - return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP); + return page_entry_is(fd, start, "populated", + PM_PRESENT | PM_SWAP, + PAGE_IS_PRESENT | PAGE_IS_SWAPPED); } unsigned long pagemap_get_pfn(int fd, char *start) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b2aca4c5f10..9e5bf59a20bf 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 5b88f7129fea..79a3dd75590e 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata, .msg_iov = &iov, .msg_iovlen = 1 }; - struct unix_diag_req *udr; struct nlmsghdr *nlh; int ret; diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..6ff3e732f449 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -416,9 +416,9 @@ int main(int argc, char *argv[]) { struct addrinfo hints, *ai; struct iovec iov[1]; + unsigned char *buf; struct msghdr msg; char cbuf[1024]; - char *buf; int err; int fd; diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 9a8229abfa02..be4a30a0d02a 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -2263,7 +2263,7 @@ static int check_results(void) int main(int argc, char **argv) { - unsigned int nr_process = 1; + long nr_process = 1; int route_sock = -1, ret = KSFT_SKIP; int test_desc_fd[2]; uint32_t route_seq; @@ -2284,7 +2284,7 @@ int main(int argc, char **argv) exit_usage(argv); } - if (nr_process > MAX_PROCESSES || !nr_process) { + if (nr_process > MAX_PROCESSES || nr_process < 1) { printk("nr_process should be between [1; %u]", MAX_PROCESSES); exit_usage(argv); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c7f9ebeebc2c..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -18,6 +18,7 @@ #include <sys/ioctl.h> #include <sys/poll.h> +#include <sys/random.h> #include <sys/sendfile.h> #include <sys/stat.h> #include <sys/socket.h> @@ -1125,15 +1126,11 @@ again: static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 8672d898f8cd..218aac467321 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -18,6 +18,7 @@ #include <time.h> #include <sys/ioctl.h> +#include <sys/random.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/wait.h> @@ -519,15 +520,11 @@ static int client(int unixfd) static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 75a2438efdf3..24a57b3ae215 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2776,7 +2776,7 @@ backup_tests() fi if reset "mpc backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2785,7 +2785,7 @@ backup_tests() fi if reset "mpc backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ @@ -2795,7 +2795,7 @@ backup_tests() fi if reset "mpc switch to backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2804,7 +2804,7 @@ backup_tests() fi if reset "mpc switch to backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ @@ -3240,7 +3240,7 @@ fastclose_tests() if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + chk_join_nr 0 0 0 0 0 0 1 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5f2b3f6c0d74..26827ea4e3e5 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -297,7 +297,7 @@ kci_test_addrlft() done sleep 5 - run_cmd_grep "10.23.11." ip addr show dev "$devdummy" + run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" if [ $? -eq 0 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" @@ -859,7 +859,7 @@ kci_test_gretap() run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - run_cmd ip -netns "$testns" link set dev $DEV_NS ups + run_cmd ip -netns "$testns" link set dev $DEV_NS up run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh new file mode 100755 index 000000000000..7bc804ffaf7c --- /dev/null +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +ret=0 + +cleanup() { + ip netns del $NETNS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +ip netns add ${NETNS} +ip netns exec ${NETNS} ip link add bond0 type bond mode 0 +ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 +ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 +ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off +ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 +ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 +ip netns exec ${NETNS} ip link set bond_slave_1 nomaster +ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + +exit $ret diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 7ea42fa02eab..c376151982c4 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -32,6 +32,8 @@ SUB_DIRS = alignment \ vphn \ math \ papr_attributes \ + papr_vpd \ + papr_sysparm \ ptrace \ security \ mce diff --git a/tools/testing/selftests/powerpc/math/fpu.h b/tools/testing/selftests/powerpc/math/fpu.h new file mode 100644 index 000000000000..a8ad0d42604e --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2023, Michael Ellerman, IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_FPU_H +#define _SELFTESTS_POWERPC_FPU_H + +static inline void randomise_darray(double *darray, int num) +{ + long val; + + for (int i = 0; i < num; i++) { + val = random(); + if (val & 1) + val *= -1; + + if (val & 2) + darray[i] = 1.0 / val; + else + darray[i] = val * val; + } +} + +#endif /* _SELFTESTS_POWERPC_FPU_H */ diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S index 9dc0c158f871..efe1e1be4695 100644 --- a/tools/testing/selftests/powerpc/math/fpu_asm.S +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -66,6 +66,40 @@ FUNC_START(check_fpu) li r3,0 # Success!!! 1: blr + +// int check_all_fprs(double darray[32]) +FUNC_START(check_all_fprs) + PUSH_BASIC_STACK(8) + mr r4, r3 // r4 = darray + li r3, 1 // prepare for failure + + stfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // backup f31 + + // Check regs f0-f30, using f31 as scratch + .set i, 0 + .rept 31 + lfd f31, (8 * i)(r4) // load expected value + fcmpu cr0, i, f31 // compare + bne cr0, 1f // bail if mismatch + .set i, i + 1 + .endr + + lfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // reload f31 + stfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // backup f30 + + lfd f30, (8 * 31)(r4) // load expected value of f31 + fcmpu cr0, f30, f31 // compare + bne cr0, 1f // bail if mismatch + + lfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // reload f30 + + // Success + li r3, 0 + +1: POP_BASIC_STACK(8) + blr +FUNC_END(check_all_fprs) + FUNC_START(test_fpu) # r3 holds pointer to where to put the result of fork # r4 holds pointer to the pid @@ -75,8 +109,9 @@ FUNC_START(test_fpu) std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid - bl load_fpu - nop + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r3 + li r0,__NR_fork sc @@ -85,7 +120,7 @@ FUNC_START(test_fpu) std r3,0(r9) ld r3,STACK_FRAME_PARAM(0)(sp) - bl check_fpu + bl check_all_fprs nop POP_FPU(256) @@ -104,8 +139,8 @@ FUNC_START(preempt_fpu) std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting std r5,STACK_FRAME_PARAM(2)(sp) # int *running - bl load_fpu - nop + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r3 sync # Atomic DEC @@ -116,8 +151,7 @@ FUNC_START(preempt_fpu) bne- 1b 2: ld r3,STACK_FRAME_PARAM(0)(sp) - bl check_fpu - nop + bl check_all_fprs cmpdi r3,0 bne 3f ld r4,STACK_FRAME_PARAM(2)(sp) diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c index 5235bdc8c0b1..9ddede0770ed 100644 --- a/tools/testing/selftests/powerpc/math/fpu_preempt.c +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -1,13 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2015, Cyril Bur, IBM Corp. + * Copyright 2023, Michael Ellerman, IBM Corp. * * This test attempts to see if the FPU registers change across preemption. - * Two things should be noted here a) The check_fpu function in asm only checks - * the non volatile registers as it is reused from the syscall test b) There is - * no way to be sure preemption happened so this test just uses many threads - * and a long wait. As such, a successful test doesn't mean much but a failure - * is bad. + * There is no way to be sure preemption happened so this test just uses many + * threads and a long wait. As such, a successful test doesn't mean much but + * a failure is bad. */ #include <stdio.h> @@ -20,9 +19,10 @@ #include <pthread.h> #include "utils.h" +#include "fpu.h" /* Time to wait for workers to get preempted (seconds) */ -#define PREEMPT_TIME 20 +#define PREEMPT_TIME 60 /* * Factor by which to multiply number of online CPUs for total number of * worker threads @@ -30,26 +30,22 @@ #define THREAD_FACTOR 8 -__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +__thread double darray[32]; int threads_starting; int running; -extern void preempt_fpu(double *darray, int *threads_starting, int *running); +extern int preempt_fpu(double *darray, int *threads_starting, int *running); void *preempt_fpu_c(void *p) { - int i; - srand(pthread_self()); - for (i = 0; i < 21; i++) - darray[i] = rand(); + long rc; - /* Test failed if it ever returns */ - preempt_fpu(darray, &threads_starting, &running); + srand(pthread_self()); + randomise_darray(darray, ARRAY_SIZE(darray)); + rc = preempt_fpu(darray, &threads_starting, &running); - return p; + return (void *)rc; } int test_preempt_fpu(void) diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c index 694f225c7e45..751d46b133fc 100644 --- a/tools/testing/selftests/powerpc/math/fpu_syscall.c +++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c @@ -14,12 +14,11 @@ #include <stdlib.h> #include "utils.h" +#include "fpu.h" extern int test_fpu(double *darray, pid_t *pid); -double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +double darray[32]; int syscall_fpu(void) { @@ -27,6 +26,9 @@ int syscall_fpu(void) int i; int ret; int child_ret; + + randomise_darray(darray, ARRAY_SIZE(darray)); + for (i = 0; i < 1000; i++) { /* test_fpu will fork() */ ret = test_fpu(darray, &fork_pid); diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c index 6761d6ce30ec..6f7cf400c687 100644 --- a/tools/testing/selftests/powerpc/math/vmx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -37,19 +37,21 @@ __thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, int threads_starting; int running; -extern void preempt_vmx(vector int *varray, int *threads_starting, int *running); +extern int preempt_vmx(vector int *varray, int *threads_starting, int *running); void *preempt_vmx_c(void *p) { int i, j; + long rc; + srand(pthread_self()); for (i = 0; i < 12; i++) for (j = 0; j < 4; j++) varray[i][j] = rand(); - /* Test fails if it ever returns */ - preempt_vmx(varray, &threads_starting, &running); - return p; + rc = preempt_vmx(varray, &threads_starting, &running); + + return (void *)rc; } int test_preempt_vmx(void) diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore new file mode 100644 index 000000000000..f2a69bf59d40 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore @@ -0,0 +1 @@ +/papr_sysparm diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile new file mode 100644 index 000000000000..7f79e437634a --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_GEN_PROGS := papr_sysparm + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c new file mode 100644 index 000000000000..f56c15a11e2f --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <asm/papr-sysparm.h> + +#include "utils.h" + +#define DEVPATH "/dev/papr-sysparm" + +static int open_close(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int get_splpar(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 20, // SPLPAR characteristics + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0); + FAIL_IF(sp.length == 0); + FAIL_IF(sp.length > sizeof(sp.data)); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int get_bad_parameter(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = UINT32_MAX, // there are only ~60 specified parameters + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1); + FAIL_IF(errno != EOPNOTSUPP); + + // Ensure the buffer is unchanged + FAIL_IF(sp.length != 0); + for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i) + FAIL_IF(sp.data[i] != 0); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int check_efault_common(unsigned long cmd) +{ + const int devfd = open(DEVPATH, O_RDWR); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, cmd, NULL) != -1); + FAIL_IF(errno != EFAULT); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int check_efault_get(void) +{ + return check_efault_common(PAPR_SYSPARM_IOC_GET); +} + +static int check_efault_set(void) +{ + return check_efault_common(PAPR_SYSPARM_IOC_SET); +} + +static int set_hmc0(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 0, // HMC0, not a settable parameter + }; + const int devfd = open(DEVPATH, O_RDWR); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1); + SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported"); + FAIL_IF(errno != EPERM); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int set_with_ro_fd(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 0, // HMC0, not a settable parameter. + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1); + SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported"); + + // HMC0 isn't a settable parameter and we would normally + // expect to get EPERM on attempts to modify it. However, when + // the file is open read-only, we expect the driver to prevent + // the attempt with a distinct error. + FAIL_IF(errno != EBADF); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +struct sysparm_test { + int (*function)(void); + const char *description; +}; + +static const struct sysparm_test sysparm_tests[] = { + { + .function = open_close, + .description = "open and close " DEVPATH " without issuing commands", + }, + { + .function = get_splpar, + .description = "retrieve SPLPAR characteristics", + }, + { + .function = get_bad_parameter, + .description = "verify EOPNOTSUPP for known-bad parameter", + }, + { + .function = check_efault_get, + .description = "PAPR_SYSPARM_IOC_GET returns EFAULT on bad address", + }, + { + .function = check_efault_set, + .description = "PAPR_SYSPARM_IOC_SET returns EFAULT on bad address", + }, + { + .function = set_hmc0, + .description = "ensure EPERM on attempt to update HMC0", + }, + { + .function = set_with_ro_fd, + .description = "PAPR_IOC_SYSPARM_SET returns EACCES on read-only fd", + }, +}; + +int main(void) +{ + size_t fails = 0; + + for (size_t i = 0; i < ARRAY_SIZE(sysparm_tests); ++i) { + const struct sysparm_test *t = &sysparm_tests[i]; + + if (test_harness(t->function, t->description)) + ++fails; + } + + return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore b/tools/testing/selftests/powerpc/papr_vpd/.gitignore new file mode 100644 index 000000000000..49285031a656 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore @@ -0,0 +1 @@ +/papr_vpd diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile new file mode 100644 index 000000000000..06b719703bfd --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_GEN_PROGS := papr_vpd + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c new file mode 100644 index 000000000000..98cbb9109ee6 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <asm/papr-vpd.h> + +#include "utils.h" + +#define DEVPATH "/dev/papr-vpd" + +static int dev_papr_vpd_open_close(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int dev_papr_vpd_get_handle_all(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + off_t size; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + void *buf = malloc((size_t)size); + FAIL_IF(!buf); + + ssize_t consumed = pread(fd, buf, size, 0); + FAIL_IF(consumed != size); + + /* Ensure EOF */ + FAIL_IF(read(fd, buf, size) != 0); + FAIL_IF(close(fd)); + + /* Verify that the buffer looks like VPD */ + static const char needle[] = "System VPD"; + FAIL_IF(!memmem(buf, size, needle, strlen(needle))); + + return 0; +} + +static int dev_papr_vpd_get_handle_byte_at_a_time(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size_t consumed = 0; + while (1) { + ssize_t res; + char c; + + errno = 0; + res = read(fd, &c, sizeof(c)); + FAIL_IF(res > sizeof(c)); + FAIL_IF(res < 0); + FAIL_IF(errno != 0); + consumed += res; + if (res == 0) + break; + } + + FAIL_IF(consumed != lseek(fd, 0, SEEK_END)); + + FAIL_IF(close(fd)); + + return 0; +} + + +static int dev_papr_vpd_unterm_loc_code(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = {}; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + /* + * Place a non-null byte in every element of loc_code; the + * driver should reject this input. + */ + memset(lc.str, 'x', ARRAY_SIZE(lc.str)); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(fd != -1); + FAIL_IF(errno != EINVAL); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int dev_papr_vpd_null_handle(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + int rc; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + rc = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, NULL); + FAIL_IF(rc != -1); + FAIL_IF(errno != EFAULT); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int papr_vpd_close_handle_without_reading(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + /* close the handle without reading it */ + FAIL_IF(close(fd) != 0); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int papr_vpd_reread(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + const off_t size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + char *bufs[2]; + + for (size_t i = 0; i < ARRAY_SIZE(bufs); ++i) { + bufs[i] = malloc(size); + FAIL_IF(!bufs[i]); + ssize_t consumed = pread(fd, bufs[i], size, 0); + FAIL_IF(consumed != size); + } + + FAIL_IF(memcmp(bufs[0], bufs[1], size)); + + FAIL_IF(close(fd) != 0); + + return 0; +} + +static int get_system_loc_code(struct papr_location_code *lc) +{ + static const char system_id_path[] = "/sys/firmware/devicetree/base/system-id"; + static const char model_path[] = "/sys/firmware/devicetree/base/model"; + char *system_id; + char *model; + int err = -1; + + if (read_file_alloc(model_path, &model, NULL)) + return err; + + if (read_file_alloc(system_id_path, &system_id, NULL)) + goto free_model; + + char *mtm; + int sscanf_ret = sscanf(model, "IBM,%ms", &mtm); + if (sscanf_ret != 1) + goto free_system_id; + + char *plant_and_seq; + if (sscanf(system_id, "IBM,%*c%*c%ms", &plant_and_seq) != 1) + goto free_mtm; + /* + * Replace - with . to build location code. + */ + char *sep = strchr(mtm, '-'); + if (!sep) + goto free_mtm; + else + *sep = '.'; + + snprintf(lc->str, sizeof(lc->str), + "U%s.%s", mtm, plant_and_seq); + err = 0; + + free(plant_and_seq); +free_mtm: + free(mtm); +free_system_id: + free(system_id); +free_model: + free(model); + return err; +} + +static int papr_vpd_system_loc_code(void) +{ + struct papr_location_code lc; + const int devfd = open(DEVPATH, O_RDONLY); + off_t size; + int fd; + + SKIP_IF_MSG(get_system_loc_code(&lc), + "Cannot determine system location code"); + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + void *buf = malloc((size_t)size); + FAIL_IF(!buf); + + ssize_t consumed = pread(fd, buf, size, 0); + FAIL_IF(consumed != size); + + /* Ensure EOF */ + FAIL_IF(read(fd, buf, size) != 0); + FAIL_IF(close(fd)); + + /* Verify that the buffer looks like VPD */ + static const char needle[] = "System VPD"; + FAIL_IF(!memmem(buf, size, needle, strlen(needle))); + + return 0; +} + +struct vpd_test { + int (*function)(void); + const char *description; +}; + +static const struct vpd_test vpd_tests[] = { + { + .function = dev_papr_vpd_open_close, + .description = "open/close " DEVPATH, + }, + { + .function = dev_papr_vpd_unterm_loc_code, + .description = "ensure EINVAL on unterminated location code", + }, + { + .function = dev_papr_vpd_null_handle, + .description = "ensure EFAULT on bad handle addr", + }, + { + .function = dev_papr_vpd_get_handle_all, + .description = "get handle for all VPD" + }, + { + .function = papr_vpd_close_handle_without_reading, + .description = "close handle without consuming VPD" + }, + { + .function = dev_papr_vpd_get_handle_byte_at_a_time, + .description = "read all VPD one byte at a time" + }, + { + .function = papr_vpd_reread, + .description = "ensure re-read yields same results" + }, + { + .function = papr_vpd_system_loc_code, + .description = "get handle for system VPD" + }, +}; + +int main(void) +{ + size_t fails = 0; + + for (size_t i = 0; i < ARRAY_SIZE(vpd_tests); ++i) { + const struct vpd_test *t = &vpd_tests[i]; + + if (test_harness(t->function, t->description)) + ++fails; + } + + return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 5b0e93f9996c..01fa816868bc 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -353,11 +353,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define SOCK_BUF_SIZE (2 * 1024 * 1024) -#define MAX_MSG_SIZE (32 * 1024) +#define MAX_MSG_PAGES 4 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { unsigned long curr_hash; + size_t max_msg_size; int page_size; int msg_count; int fd; @@ -373,7 +374,8 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) curr_hash = 0; page_size = getpagesize(); - msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + max_msg_size = MAX_MSG_PAGES * page_size; + msg_count = SOCK_BUF_SIZE / max_msg_size; for (int i = 0; i < msg_count; i++) { size_t buf_size; @@ -383,7 +385,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) /* Use "small" buffers and "big" buffers. */ if (i & 1) buf_size = page_size + - (rand() % (MAX_MSG_SIZE - page_size)); + (rand() % (max_msg_size - page_size)); else buf_size = 1 + (rand() % page_size); @@ -429,7 +431,6 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) unsigned long remote_hash; unsigned long curr_hash; int fd; - char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -457,8 +458,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) control_writeln("SRVREADY"); /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); - iov.iov_base = buf; - iov.iov_len = sizeof(buf); + iov.iov_len = MAX_MSG_PAGES * getpagesize(); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + perror("malloc"); + exit(EXIT_FAILURE); + } + msg.msg_iov = &iov; msg.msg_iovlen = 1; @@ -483,6 +489,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } + free(iov.iov_base); close(fd); remote_hash = control_readulong(); |