From d5dfc80f80dbb3bf94e5e9efa694670ea78cd84d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 26 Jul 2016 15:21:08 -0700 Subject: dma-debug: track bucket lock state for static checkers get_hash_bucket() and put_hash_bucket() acquire and release the same spinlock, but this confuses static checkers such as sparse lib/dma-debug.c:254:27: warning: context imbalance in 'get_hash_bucket' - wrong count at exit lib/dma-debug.c:268:13: warning: context imbalance in 'put_hash_bucket' - unexpected unlock Add the appropriate acquire and release statements so that checkers can properly track the lock state. Link: http://lkml.kernel.org/r/20160701191552.24295-1-sboyd@codeaurora.org Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 51a76af25c66..fcfa1939ac41 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -253,6 +253,7 @@ static int hash_fn(struct dma_debug_entry *entry) */ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, unsigned long *flags) + __acquires(&dma_entry_hash[idx].lock) { int idx = hash_fn(entry); unsigned long __flags; @@ -267,6 +268,7 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, */ static void put_hash_bucket(struct hash_bucket *bucket, unsigned long *flags) + __releases(&bucket->lock) { unsigned long __flags = *flags; -- cgit v1.2.3 From f2ca0b55710752588ccff5224a11e6aea43a996a Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 26 Jul 2016 15:23:55 -0700 Subject: mm/page_owner: use stackdepot to store stacktrace Currently, we store each page's allocation stacktrace on corresponding page_ext structure and it requires a lot of memory. This causes the problem that memory tight system doesn't work well if page_owner is enabled. Moreover, even with this large memory consumption, we cannot get full stacktrace because we allocate memory at boot time and just maintain 8 stacktrace slots to balance memory consumption. We could increase it to more but it would make system unusable or change system behaviour. To solve the problem, this patch uses stackdepot to store stacktrace. It obviously provides memory saving but there is a drawback that stackdepot could fail. stackdepot allocates memory at runtime so it could fail if system has not enough memory. But, most of allocation stack are generated at very early time and there are much memory at this time. So, failure would not happen easily. And, one failure means that we miss just one page's allocation stacktrace so it would not be a big problem. In this patch, when memory allocation failure happens, we store special stracktrace handle to the page that is failed to save stacktrace. With it, user can guess memory usage properly even if failure happens. Memory saving looks as following. (4GB memory system with page_owner) (before the patch -> after the patch) static allocation: 92274688 bytes -> 25165824 bytes dynamic allocation after boot + kernel build: 0 bytes -> 327680 bytes total: 92274688 bytes -> 25493504 bytes 72% reduction in total. Note that implementation looks complex than someone would imagine because there is recursion issue. stackdepot uses page allocator and page_owner is called at page allocation. Using stackdepot in page_owner could re-call page allcator and then page_owner. That is a recursion. To detect and avoid it, whenever we obtain stacktrace, recursion is checked and page_owner is set to dummy information if found. Dummy information means that this page is allocated for page_owner feature itself (such as stackdepot) and it's understandable behavior for user. [iamjoonsoo.kim@lge.com: mm-page_owner-use-stackdepot-to-store-stacktrace-v3] Link: http://lkml.kernel.org/r/1464230275-25791-6-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1466150259-27727-7-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1464230275-25791-6-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mel Gorman Cc: Minchan Kim Cc: Alexander Potapenko Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 805b7048a1bd..f07842e2d69f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -244,6 +244,7 @@ config PAGE_OWNER depends on DEBUG_KERNEL && STACKTRACE_SUPPORT select DEBUG_FS select STACKTRACE + select STACKDEPOT select PAGE_EXTENSION help This keeps track of what call chain is the owner of a page, may -- cgit v1.2.3 From c78c66d1ddfdbd2353f3fcfeba0268524537b096 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 26 Jul 2016 15:26:02 -0700 Subject: radix-tree: implement radix_tree_maybe_preload_order() The new helper is similar to radix_tree_maybe_preload(), but tries to preload number of nodes required to insert (1 << order) continuous naturally-aligned elements. This is required to push huge pages into pagecache. Link: http://lkml.kernel.org/r/1466021202-61880-24-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8b7d8459bb9d..61b8fb529cef 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -38,6 +38,9 @@ #include /* in_interrupt() */ +/* Number of nodes in fully populated tree of given height */ +static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; + /* * Radix tree node cache. */ @@ -342,7 +345,7 @@ radix_tree_node_free(struct radix_tree_node *node) * To make use of this facility, the radix tree must be initialised without * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ -static int __radix_tree_preload(gfp_t gfp_mask) +static int __radix_tree_preload(gfp_t gfp_mask, int nr) { struct radix_tree_preload *rtp; struct radix_tree_node *node; @@ -350,14 +353,14 @@ static int __radix_tree_preload(gfp_t gfp_mask) preempt_disable(); rtp = this_cpu_ptr(&radix_tree_preloads); - while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { + while (rtp->nr < nr) { preempt_enable(); node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); if (node == NULL) goto out; preempt_disable(); rtp = this_cpu_ptr(&radix_tree_preloads); - if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { + if (rtp->nr < nr) { node->private_data = rtp->nodes; rtp->nodes = node; rtp->nr++; @@ -383,7 +386,7 @@ int radix_tree_preload(gfp_t gfp_mask) { /* Warn on non-sensical use... */ WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); - return __radix_tree_preload(gfp_mask); + return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); } EXPORT_SYMBOL(radix_tree_preload); @@ -395,13 +398,58 @@ EXPORT_SYMBOL(radix_tree_preload); int radix_tree_maybe_preload(gfp_t gfp_mask) { if (gfpflags_allow_blocking(gfp_mask)) - return __radix_tree_preload(gfp_mask); + return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); /* Preloading doesn't help anything with this gfp mask, skip it */ preempt_disable(); return 0; } EXPORT_SYMBOL(radix_tree_maybe_preload); +/* + * The same as function above, but preload number of nodes required to insert + * (1 << order) continuous naturally-aligned elements. + */ +int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) +{ + unsigned long nr_subtrees; + int nr_nodes, subtree_height; + + /* Preloading doesn't help anything with this gfp mask, skip it */ + if (!gfpflags_allow_blocking(gfp_mask)) { + preempt_disable(); + return 0; + } + + /* + * Calculate number and height of fully populated subtrees it takes to + * store (1 << order) elements. + */ + nr_subtrees = 1 << order; + for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; + subtree_height++) + nr_subtrees >>= RADIX_TREE_MAP_SHIFT; + + /* + * The worst case is zero height tree with a single item at index 0 and + * then inserting items starting at ULONG_MAX - (1 << order). + * + * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to + * 0-index item. + */ + nr_nodes = RADIX_TREE_MAX_PATH; + + /* Plus branch to fully populated subtrees. */ + nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; + + /* Root node is shared. */ + nr_nodes--; + + /* Plus nodes required to build subtrees. */ + nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; + + return __radix_tree_preload(gfp_mask, nr_nodes); +} + /* * The maximum index which can be stored in a radix tree */ @@ -1571,6 +1619,31 @@ radix_tree_node_ctor(void *arg) INIT_LIST_HEAD(&node->private_list); } +static __init unsigned long __maxindex(unsigned int height) +{ + unsigned int width = height * RADIX_TREE_MAP_SHIFT; + int shift = RADIX_TREE_INDEX_BITS - width; + + if (shift < 0) + return ~0UL; + if (shift >= BITS_PER_LONG) + return 0UL; + return ~0UL >> shift; +} + +static __init void radix_tree_init_maxnodes(void) +{ + unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) + height_to_maxindex[i] = __maxindex(i); + for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { + for (j = i; j > 0; j--) + height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; + } +} + static int radix_tree_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1597,5 +1670,6 @@ void __init radix_tree_init(void) sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, radix_tree_node_ctor); + radix_tree_init_maxnodes(); hotcpu_notifier(radix_tree_callback, 0); } -- cgit v1.2.3