diff options
Diffstat (limited to 'kernel/bpf/memalloc.c')
-rw-r--r-- | kernel/bpf/memalloc.c | 200 |
1 files changed, 94 insertions, 106 deletions
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 63b909d277d4..550f02e2cb13 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -121,6 +121,8 @@ struct bpf_mem_caches { struct bpf_mem_cache cache[NUM_CACHES]; }; +static const u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + static struct llist_node notrace *__llist_del_first(struct llist_head *head) { struct llist_node *entry, *next; @@ -462,11 +464,17 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c) * consume ~ 11 Kbyte per cpu. * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. + * + * Percpu allocation is typically rare. To avoid potential unnecessary large + * memory consumption, set low_mark = 1 and high_mark = 3, resulting in c->batch = 1. */ static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); - if (c->unit_size <= 256) { + if (c->percpu_size) { + c->low_watermark = 1; + c->high_watermark = 3; + } else if (c->unit_size <= 256) { c->low_watermark = 32; c->high_watermark = 96; } else { @@ -483,32 +491,16 @@ static void init_refill_work(struct bpf_mem_cache *c) static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) { - /* To avoid consuming memory assume that 1st run of bpf - * prog won't be doing more than 4 map_update_elem from - * irq disabled region - */ - alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); -} - -static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) -{ - struct llist_node *first; - unsigned int obj_size; + int cnt = 1; - first = c->free_llist.first; - if (!first) - return 0; - - if (c->percpu_size) - obj_size = pcpu_alloc_size(((void **)first)[1]); - else - obj_size = ksize(first); - if (obj_size != c->unit_size) { - WARN_ONCE(1, "bpf_mem_cache[%u]: percpu %d, unexpected object size %u, expect %u\n", - idx, c->percpu_size, obj_size, c->unit_size); - return -EINVAL; - } - return 0; + /* To avoid consuming memory, for non-percpu allocation, assume that + * 1st run of bpf prog won't be doing more than 4 map_update_elem from + * irq disabled region if unit size is less than or equal to 256. + * For all other cases, let us just do one allocation. + */ + if (!c->percpu_size && c->unit_size <= 256) + cnt = 4; + alloc_bulk(c, cnt, cpu_to_node(cpu), false); } /* When size != 0 bpf_mem_cache for each cpu. @@ -520,11 +512,13 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { - static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; - int cpu, i, err, unit_size, percpu_size = 0; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; + int cpu, i, unit_size, percpu_size = 0; + + if (percpu && size == 0) + return -EINVAL; /* room for llist_node and per-cpu pointer */ if (percpu) @@ -544,6 +538,8 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) if (memcg_bpf_enabled()) objcg = get_obj_cgroup_from_current(); #endif + ma->objcg = objcg; + for_each_possible_cpu(cpu) { c = per_cpu_ptr(pc, cpu); c->unit_size = unit_size; @@ -560,10 +556,10 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; - err = 0; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif + ma->objcg = objcg; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(pcc, cpu); for (i = 0; i < NUM_CACHES; i++) { @@ -574,28 +570,62 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->tgt = c; init_refill_work(c); - /* Another bpf_mem_cache will be used when allocating - * c->unit_size in bpf_mem_alloc(), so doesn't prefill - * for the bpf_mem_cache because these free objects will - * never be used. - */ - if (i != bpf_mem_cache_idx(c->unit_size)) - continue; prefill_mem_cache(c, cpu); - err = check_obj_size(c, i); - if (err) - goto out; } } -out: ma->caches = pcc; - /* refill_work is either zeroed or initialized, so it is safe to - * call irq_work_sync(). - */ - if (err) - bpf_mem_alloc_destroy(ma); - return err; + return 0; +} + +int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg) +{ + struct bpf_mem_caches __percpu *pcc; + + pcc = __alloc_percpu_gfp(sizeof(struct bpf_mem_caches), 8, GFP_KERNEL); + if (!pcc) + return -ENOMEM; + + ma->caches = pcc; + ma->objcg = objcg; + ma->percpu = true; + return 0; +} + +int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size) +{ + struct bpf_mem_caches *cc, __percpu *pcc; + int cpu, i, unit_size, percpu_size; + struct obj_cgroup *objcg; + struct bpf_mem_cache *c; + + i = bpf_mem_cache_idx(size); + if (i < 0) + return -EINVAL; + + /* room for llist_node and per-cpu pointer */ + percpu_size = LLIST_NODE_SZ + sizeof(void *); + + unit_size = sizes[i]; + objcg = ma->objcg; + pcc = ma->caches; + + for_each_possible_cpu(cpu) { + cc = per_cpu_ptr(pcc, cpu); + c = &cc->cache[i]; + if (c->unit_size) + break; + + c->unit_size = unit_size; + c->objcg = objcg; + c->percpu_size = percpu_size; + c->tgt = c; + + init_refill_work(c); + prefill_mem_cache(c, cpu); + } + + return 0; } static void drain_mem_cache(struct bpf_mem_cache *c) @@ -729,9 +759,8 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } - /* objcg is the same across cpus */ - if (c->objcg) - obj_cgroup_put(c->objcg); + if (ma->objcg) + obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } if (ma->caches) { @@ -747,8 +776,8 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } } - if (c->objcg) - obj_cgroup_put(c->objcg); + if (ma->objcg) + obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } } @@ -869,9 +898,11 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) void *ret; if (!size) - return ZERO_SIZE_PTR; + return NULL; - idx = bpf_mem_cache_idx(size + LLIST_NODE_SZ); + if (!ma->percpu) + size += LLIST_NODE_SZ; + idx = bpf_mem_cache_idx(size); if (idx < 0) return NULL; @@ -879,26 +910,17 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) return !ret ? NULL : ret + LLIST_NODE_SZ; } -static notrace int bpf_mem_free_idx(void *ptr, bool percpu) -{ - size_t size; - - if (percpu) - size = pcpu_alloc_size(*((void **)ptr)); - else - size = ksize(ptr - LLIST_NODE_SZ); - return bpf_mem_cache_idx(size); -} - void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) { + struct bpf_mem_cache *c; int idx; if (!ptr) return; - idx = bpf_mem_free_idx(ptr, ma->percpu); - if (idx < 0) + c = *(void **)(ptr - LLIST_NODE_SZ); + idx = bpf_mem_cache_idx(c->unit_size); + if (WARN_ON_ONCE(idx < 0)) return; unit_free(this_cpu_ptr(ma->caches)->cache + idx, ptr); @@ -906,13 +928,15 @@ void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr) { + struct bpf_mem_cache *c; int idx; if (!ptr) return; - idx = bpf_mem_free_idx(ptr, ma->percpu); - if (idx < 0) + c = *(void **)(ptr - LLIST_NODE_SZ); + idx = bpf_mem_cache_idx(c->unit_size); + if (WARN_ON_ONCE(idx < 0)) return; unit_free_rcu(this_cpu_ptr(ma->caches)->cache + idx, ptr); @@ -978,47 +1002,11 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT); + if (ret) + *(struct bpf_mem_cache **)ret = c; set_active_memcg(old_memcg); mem_cgroup_put(memcg); } return !ret ? NULL : ret + LLIST_NODE_SZ; } - -/* The alignment of dynamic per-cpu area is 8, so c->unit_size and the - * actual size of dynamic per-cpu area will always be matched and there is - * no need to adjust size_index for per-cpu allocation. However for the - * simplicity of the implementation, use an unified size_index for both - * kmalloc and per-cpu allocation. - */ -static __init int bpf_mem_cache_adjust_size(void) -{ - unsigned int size; - - /* Adjusting the indexes in size_index() according to the object_size - * of underlying slab cache, so bpf_mem_alloc() will select a - * bpf_mem_cache with unit_size equal to the object_size of - * the underlying slab cache. - * - * The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is - * 256-bytes, so only do adjustment for [8-bytes, 192-bytes]. - */ - for (size = 192; size >= 8; size -= 8) { - unsigned int kmalloc_size, index; - - kmalloc_size = kmalloc_size_roundup(size); - if (kmalloc_size == size) - continue; - - if (kmalloc_size <= 192) - index = size_index[(kmalloc_size - 1) / 8]; - else - index = fls(kmalloc_size - 1) - 1; - /* Only overwrite if necessary */ - if (size_index[(size - 1) / 8] != index) - size_index[(size - 1) / 8] = index; - } - - return 0; -} -subsys_initcall(bpf_mem_cache_adjust_size); |