mm: introduce kmem_cache_alloc_lru

We currently allocate scope for every memcg to be able to tracked on every superblock instantiated in the system, regardless of whether that superblock is even accessible to that memcg. These huge memcg counts come from container hosts where memcgs are confined to just a small subset of the total number of superblocks that instantiated at any given point in time. For these systems with huge container counts, list_lru does not need the capability of tracking every memcg on every superblock. What it comes down to is that adding the memcg to the list_lru at the first insert. So introduce kmem_cache_alloc_lru to allocate objects and its list_lru. In the later patch, we will convert all inode and dentry allocation from kmem_cache_alloc to kmem_cache_alloc_lru. Link: https://lkml.kernel.org/r/20220228122126.37293-3-songmuchun@bytedance.com Signed-off-by: Muchun Song <songmuchun@bytedance.com> Cc: Alex Shi <alexs@kernel.org> Cc: Anna Schumaker <Anna.Schumaker@Netapp.com> Cc: Chao Yu <chao@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Fam Zheng <fam.zheng@bytedance.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kari Argillander <kari.argillander@gmail.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Shakeel Butt <shakeelb@google.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Trond Myklebust <trond.myklebust@hammerspace.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Xiongchun Duan <duanxiongchun@bytedance.com> Cc: Yang Shi <shy828301@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Muchun Song <songmuchun@bytedance.com> 2022-03-22 14:40:56 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-03-22 15:57:03 -0700
commit: 88f2ef73fd66491a2f9a82373d22ca6540f23c62 (patch)
tree: 01f07916f6ba0c16f768702ab8176c07c7b9d6c1 /mm/slub.c
parent: 6a6b7b77cc0fdc13f50c66c219c8c05500a8dfce (diff)
download: linux-stable-88f2ef73fd66491a2f9a82373d22ca6540f23c62.tar.gz
linux-stable-88f2ef73fd66491a2f9a82373d22ca6540f23c62.tar.bz2
linux-stable-88f2ef73fd66491a2f9a82373d22ca6540f23c62.zip
1 files changed, 28 insertions, 14 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 261474092e43..07cdd999c3fe 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3131,7 +3131,7 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
  *
  * Otherwise we can simply pick the next object from the lockless free list.
  */
-static __always_inline void *slab_alloc_node(struct kmem_cache *s,
+static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
 		gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
 {
 	void *object;
@@ -3141,7 +3141,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	struct obj_cgroup *objcg = NULL;
 	bool init = false;
 
-	s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
+	s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
 	if (!s)
 		return NULL;
 
@@ -3232,27 +3232,41 @@ out:
 	return object;
 }
 
-static __always_inline void *slab_alloc(struct kmem_cache *s,
+static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
 		gfp_t gfpflags, unsigned long addr, size_t orig_size)
 {
-	return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
+	return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
 }
 
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+static __always_inline
+void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+			     gfp_t gfpflags)
 {
-	void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
+	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
 	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
 }
+
+void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+{
+	return __kmem_cache_alloc_lru(s, NULL, gfpflags);
+}
 EXPORT_SYMBOL(kmem_cache_alloc);
 
+void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+			   gfp_t gfpflags)
+{
+	return __kmem_cache_alloc_lru(s, lru, gfpflags);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_lru);
+
 #ifdef CONFIG_TRACING
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
-	void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
+	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
 	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
@@ -3263,7 +3277,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-	void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
+	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
 	trace_kmem_cache_alloc_node(_RET_IP_, ret,
 				    s->object_size, s->size, gfpflags, node);
@@ -3277,7 +3291,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 				    gfp_t gfpflags,
 				    int node, size_t size)
 {
-	void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
+	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, s->size, gfpflags, node);
@@ -3667,7 +3681,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	struct obj_cgroup *objcg = NULL;
 
 	/* memcg and kmem_cache debug support */
-	s = slab_pre_alloc_hook(s, &objcg, size, flags);
+	s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
 	if (unlikely(!s))
 		return false;
 	/*
@@ -4417,7 +4431,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	ret = slab_alloc(s, flags, _RET_IP_, size);
+	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
 	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
 
@@ -4465,7 +4479,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
+	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
 	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
 
@@ -4923,7 +4937,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	ret = slab_alloc(s, gfpflags, caller, size);
+	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
 	trace_kmalloc(caller, ret, size, s->size, gfpflags);
@@ -4954,7 +4968,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	ret = slab_alloc_node(s, gfpflags, node, caller, size);
+	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
 	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
author	Muchun Song <songmuchun@bytedance.com>	2022-03-22 14:40:56 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-03-22 15:57:03 -0700
commit	88f2ef73fd66491a2f9a82373d22ca6540f23c62 (patch)
tree	01f07916f6ba0c16f768702ab8176c07c7b9d6c1 /mm/slub.c
parent	6a6b7b77cc0fdc13f50c66c219c8c05500a8dfce (diff)
download	linux-stable-88f2ef73fd66491a2f9a82373d22ca6540f23c62.tar.gz linux-stable-88f2ef73fd66491a2f9a82373d22ca6540f23c62.tar.bz2 linux-stable-88f2ef73fd66491a2f9a82373d22ca6540f23c62.zip