From 6cb917411e028dcb66ce8f5db1b47361b78d7d3f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 29 Jan 2022 13:40:52 -0800 Subject: include/linux/sysctl.h: fix register_sysctl_mount_point() return type The CONFIG_SYSCTL=n stub returns the wrong type. Fixes: ee9efac48a082 ("sysctl: add helper to register a sysctl mount point") Reported-by: kernel test robot Acked-by: Luis Chamberlain Cc: Tong Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 180adf7da785..6353d6db69b2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -265,7 +265,7 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * return NULL; } -static inline struct sysctl_header *register_sysctl_mount_point(const char *path) +static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) { return NULL; } -- cgit v1.2.3 From e7f1e8834b2b2144dfbe0b2235d05e4f6f95882e Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sat, 29 Jan 2022 13:40:55 -0800 Subject: binfmt_misc: fix crash when load/unload module We should unregister the table upon module unload otherwise something horrible will happen when we load binfmt_misc module again. Also note that we should keep value returned by register_sysctl_mount_point() and release it later, otherwise it will leak. Also, per Christian's comment, to fully restore the old behavior that won't break userspace the check(binfmt_misc_header) should be eliminated. To reproduce: modprobe binfmt_misc modprobe -r binfmt_misc modprobe binfmt_misc modprobe -r binfmt_misc modprobe binfmt_misc resulting in modprobe: can't load module binfmt_misc (kernel/fs/binfmt_misc.ko): Cannot allocate memory and an unhappy kernel: binfmt_misc: Failed to create fs/binfmt_misc sysctl mount point binfmt_misc: Failed to create fs/binfmt_misc sysctl mount point BUG: unable to handle page fault for address: fffffbfff8004802 Call Trace: init_misc_binfmt+0x2d/0x1000 [binfmt_misc] Link: https://lkml.kernel.org/r/20220124181812.1869535-2-ztong0001@gmail.com Fixes: 3ba442d5331f ("fs: move binfmt_misc sysctl to its own file") Signed-off-by: Tong Zhang Co-developed-by: Christian Brauner Acked-by: Luis Chamberlain Cc: Eric Biederman Cc: Kees Cook Cc: Iurii Zaikin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index ddea6acbddde..c07f35719ee3 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -817,20 +817,20 @@ static struct file_system_type bm_fs_type = { }; MODULE_ALIAS_FS("binfmt_misc"); +static struct ctl_table_header *binfmt_misc_header; + static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); if (!err) insert_binfmt(&misc_format); - if (!register_sysctl_mount_point("fs/binfmt_misc")) { - pr_warn("Failed to create fs/binfmt_misc sysctl mount point"); - return -ENOMEM; - } + binfmt_misc_header = register_sysctl_mount_point("fs/binfmt_misc"); return 0; } static void __exit exit_misc_binfmt(void) { + unregister_sysctl_table(binfmt_misc_header); unregister_binfmt(&misc_format); unregister_filesystem(&bm_fs_type); } -- cgit v1.2.3 From dbecf9b8b8ce580f4e11afed9d61e8aa294cddd2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 29 Jan 2022 13:40:58 -0800 Subject: ia64: make IA64_MCA_RECOVERY bool instead of tristate In linux-next, IA64_MCA_RECOVERY uses the (new) function make_task_dead(), which is not exported for use by modules. Instead of exporting it for one user, convert IA64_MCA_RECOVERY to be a bool Kconfig symbol. In a config file from "kernel test robot " for a different problem, this linker error was exposed when CONFIG_IA64_MCA_RECOVERY=m. Fixes this build error: ERROR: modpost: "make_task_dead" [arch/ia64/kernel/mca_recovery.ko] undefined! Link: https://lkml.kernel.org/r/20220124213129.29306-1-rdunlap@infradead.org Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Randy Dunlap Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: "Eric W. Biederman" Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 703952819e10..a7e01573abd8 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -318,7 +318,7 @@ config ARCH_PROC_KCORE_TEXT depends on PROC_KCORE config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." + bool "MCA recovery from errors other than TLB." config IA64_PALINFO tristate "/proc/pal support" -- cgit v1.2.3 From 61e28cf0543c7d8e6ef88c3c305f727c5a21ba5b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sat, 29 Jan 2022 13:41:01 -0800 Subject: memory-failure: fetch compound_head after pgmap_pfn_valid() memory_failure_dev_pagemap() at the moment assumes base pages (e.g. dax_lock_page()). For devmap with compound pages fetch the compound_head in case a tail page memory failure is being handled. Currently this is a nop, but in the advent of compound pages in dev_pagemap it allows memory_failure_dev_pagemap() to keep working. Without this fix memory-failure handling (i.e. MCEs on pmem) with device-dax configured namespaces will regress (and crash). Link: https://lkml.kernel.org/r/20211202204422.26777-2-joao.m.martins@oracle.com Reported-by: Jane Chu Signed-off-by: Joao Martins Reviewed-by: Naoya Horiguchi Reviewed-by: Dan Williams Reviewed-by: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 14ae5c18e776..97a9ed8f87a9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1595,6 +1595,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; } + /* + * Pages instantiated by device-dax (not filesystem-dax) + * may be compound pages. + */ + page = compound_head(page); + /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by -- cgit v1.2.3 From 536f4217ced62b671bd759f6b549621a5654a70f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 29 Jan 2022 13:41:04 -0800 Subject: mm: page->mapping folio->mapping should have the same offset As with the other members of folio, the offset of page->mapping and folio->mapping must be the same. The compile-time check was inadvertently removed during development. Add it back. [willy@infradead.org: changelog redo] Link: https://lkml.kernel.org/r/20220104011734.21714-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9db36dc5d4cf..5140e5feb486 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -261,6 +261,7 @@ static_assert(sizeof(struct page) == sizeof(struct folio)); static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); +FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); FOLIO_MATCH(index, index); FOLIO_MATCH(private, private); -- cgit v1.2.3 From 0226bd64da52aa23120d1450c37a424387827a21 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sat, 29 Jan 2022 13:41:07 -0800 Subject: tools/testing/scatterlist: add missing defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cited commits replaced preemptible with pagefault_disabled and flush_kernel_dcache_page with flush_dcache_page respectively, hence need to update the corresponding defines in the test. scatterlist.c: In function ‘sg_miter_stop’: scatterlist.c:919:4: warning: implicit declaration of function ‘flush_dcache_page’ [-Wimplicit-function-declaration] flush_dcache_page(miter->page); ^~~~~~~~~~~~~~~~~ In file included from linux/scatterlist.h:8:0, from scatterlist.c:9: scatterlist.c:922:18: warning: implicit declaration of function ‘pagefault_disabled’ [-Wimplicit-function-declaration] WARN_ON_ONCE(!pagefault_disabled()); ^ linux/mm.h:23:25: note: in definition of macro ‘WARN_ON_ONCE’ int __ret_warn_on = !!(condition); \ ^~~~~~~~~ Link: https://lkml.kernel.org/r/20220118082105.1737320-1-maorg@nvidia.com Fixes: 723aca208516 ("mm/scatterlist: replace the !preemptible warning in sg_miter_stop()") Fixes: 0e84f5dbf8d6 ("scatterlist: replace flush_kernel_dcache_page with flush_dcache_page") Signed-off-by: Maor Gottlieb Tested-by: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/scatterlist/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 16ec895bbe5f..5bd9e6e80625 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page) __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) -#define preemptible() (1) +#define pagefault_disabled() (0) static inline void *kmap(struct page *page) { @@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) +#define flush_dcache_page(p) #define MAX_ERRNO 4095 -- cgit v1.2.3 From 09c6304e38e440b93a9ebf3f3cf75cd6cb529f91 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Sat, 29 Jan 2022 13:41:11 -0800 Subject: kasan: test: fix compatibility with FORTIFY_SOURCE With CONFIG_FORTIFY_SOURCE enabled, string functions will also perform dynamic checks using __builtin_object_size(ptr), which when failed will panic the kernel. Because the KASAN test deliberately performs out-of-bounds operations, the kernel panics with FORTIFY_SOURCE, for example: | kernel BUG at lib/string_helpers.c:910! | invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI | CPU: 1 PID: 137 Comm: kunit_try_catch Tainted: G B 5.16.0-rc3+ #3 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 | RIP: 0010:fortify_panic+0x19/0x1b | ... | Call Trace: | kmalloc_oob_in_memset.cold+0x16/0x16 | ... Fix it by also hiding `ptr` from the optimizer, which will ensure that __builtin_object_size() does not return a valid size, preventing fortified string functions from panicking. Link: https://lkml.kernel.org/r/20220124160744.1244685-1-elver@google.com Signed-off-by: Marco Elver Reported-by: Nico Pache Reviewed-by: Nico Pache Reviewed-by: Andrey Konovalov Reviewed-by: Kees Cook Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Brendan Higgins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 847cdbefab46..26a5c9007653 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -492,6 +492,7 @@ static void kmalloc_oob_in_memset(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + KASAN_GRANULE_SIZE)); @@ -515,6 +516,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(invalid_size); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); @@ -531,6 +533,7 @@ static void kmalloc_memmove_invalid_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); @@ -893,6 +896,7 @@ static void kasan_memchr(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = memchr(ptr, '1', size + 1)); @@ -919,6 +923,7 @@ static void kasan_memcmp(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = memcmp(ptr, arr, size+1)); -- cgit v1.2.3 From 27fe73394a1c6d0b07fa4d95f1bca116d1cc66e9 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sat, 29 Jan 2022 13:41:14 -0800 Subject: mm, kasan: use compare-exchange operation to set KASAN page tag It has been reported that the tag setting operation on newly-allocated pages can cause the page flags to be corrupted when performed concurrently with other flag updates as a result of the use of non-atomic operations. Fix the problem by using a compare-exchange loop to update the tag. Link: https://lkml.kernel.org/r/20220120020148.1632253-1-pcc@google.com Link: https://linux-review.googlesource.com/id/I456b24a2b9067d93968d43b4bb3351c0cec63101 Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e1a84b1e6787..213cc569b192 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1506,11 +1506,18 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { - if (kasan_enabled()) { - tag ^= 0xff; - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; - } + unsigned long old_flags, flags; + + if (!kasan_enabled()) + return; + + tag ^= 0xff; + old_flags = READ_ONCE(page->flags); + do { + flags = old_flags; + flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); } static inline void page_kasan_tag_reset(struct page *page) -- cgit v1.2.3 From 51e50fbd3efc6064c30ed73a5e009018b36e290a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 29 Jan 2022 13:41:17 -0800 Subject: psi: fix "no previous prototype" warnings when CONFIG_CGROUPS=n When CONFIG_CGROUPS is disabled psi code generates the following warnings: kernel/sched/psi.c:1112:21: warning: no previous prototype for 'psi_trigger_create' [-Wmissing-prototypes] 1112 | struct psi_trigger *psi_trigger_create(struct psi_group *group, | ^~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1182:6: warning: no previous prototype for 'psi_trigger_destroy' [-Wmissing-prototypes] 1182 | void psi_trigger_destroy(struct psi_trigger *t) | ^~~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1249:10: warning: no previous prototype for 'psi_trigger_poll' [-Wmissing-prototypes] 1249 | __poll_t psi_trigger_poll(void **trigger_ptr, | ^~~~~~~~~~~~~~~~ Change the declarations of these functions in the header to provide the prototypes even when they are unused. Link: https://lkml.kernel.org/r/20220119223940.787748-2-surenb@google.com Fixes: 0e94682b73bf ("psi: introduce psi monitor") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/psi.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/psi.h b/include/linux/psi.h index f8ce53bfdb2a..7f7d1d88c3bb 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -25,18 +25,17 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); - -#ifdef CONFIG_CGROUPS -int psi_cgroup_alloc(struct cgroup *cgrp); -void psi_cgroup_free(struct cgroup *cgrp); -void cgroup_move_task(struct task_struct *p, struct css_set *to); - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); + +#ifdef CONFIG_CGROUPS +int psi_cgroup_alloc(struct cgroup *cgrp); +void psi_cgroup_free(struct cgroup *cgrp); +void cgroup_move_task(struct task_struct *p, struct css_set *to); #endif #else /* CONFIG_PSI */ -- cgit v1.2.3 From 44585f7bc0cb01095bc2ad4258049c02bbad21ef Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 29 Jan 2022 13:41:20 -0800 Subject: psi: fix "defined but not used" warnings when CONFIG_PROC_FS=n When CONFIG_PROC_FS is disabled psi code generates the following warnings: kernel/sched/psi.c:1364:30: warning: 'psi_cpu_proc_ops' defined but not used [-Wunused-const-variable=] 1364 | static const struct proc_ops psi_cpu_proc_ops = { | ^~~~~~~~~~~~~~~~ kernel/sched/psi.c:1355:30: warning: 'psi_memory_proc_ops' defined but not used [-Wunused-const-variable=] 1355 | static const struct proc_ops psi_memory_proc_ops = { | ^~~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1346:30: warning: 'psi_io_proc_ops' defined but not used [-Wunused-const-variable=] 1346 | static const struct proc_ops psi_io_proc_ops = { | ^~~~~~~~~~~~~~~ Make definitions of these structures and related functions conditional on CONFIG_PROC_FS config. Link: https://lkml.kernel.org/r/20220119223940.787748-3-surenb@google.com Fixes: 0e94682b73bf ("psi: introduce psi monitor") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 79 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index c137c4d6983e..e14358178849 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1082,44 +1082,6 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) return 0; } -static int psi_io_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_IO); -} - -static int psi_memory_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_MEM); -} - -static int psi_cpu_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_CPU); -} - -static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) -{ - if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - return single_open(file, psi_show, NULL); -} - -static int psi_io_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_io_show); -} - -static int psi_memory_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_memory_show); -} - -static int psi_cpu_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_cpu_show); -} - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res) { @@ -1278,6 +1240,45 @@ __poll_t psi_trigger_poll(void **trigger_ptr, return ret; } +#ifdef CONFIG_PROC_FS +static int psi_io_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_IO); +} + +static int psi_memory_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_MEM); +} + +static int psi_cpu_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_CPU); +} + +static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) +{ + if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + return single_open(file, psi_show, NULL); +} + +static int psi_io_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_io_show); +} + +static int psi_memory_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_memory_show); +} + +static int psi_cpu_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_cpu_show); +} + static ssize_t psi_write(struct file *file, const char __user *user_buf, size_t nbytes, enum psi_res res) { @@ -1392,3 +1393,5 @@ static int __init psi_proc_init(void) return 0; } module_init(psi_proc_init); + +#endif /* CONFIG_PROC_FS */ -- cgit v1.2.3 From 4cd1103d8c66b2cdb7e64385c274edb0ac5e8887 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Sat, 29 Jan 2022 13:41:23 -0800 Subject: jbd2: export jbd2_journal_[grab|put]_journal_head Patch series "ocfs2: fix a deadlock case". This fixes a deadlock case in ocfs2. We firstly export jbd2 symbols jbd2_journal_[grab|put]_journal_head as preparation and later use them in ocfs2 insread of jbd_[lock|unlock]_bh_journal_head to fix the deadlock. This patch (of 2): This exports symbols jbd2_journal_[grab|put]_journal_head, which will be used outside modules, e.g. ocfs2. Link: https://lkml.kernel.org/r/20220121071205.100648-2-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Andreas Dilger Cc: Gautham Ananthakrishna Cc: Saeed Mirzamohammadi Cc: "Theodore Ts'o" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f13d548e4a7f..bf108d4493d2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2972,6 +2972,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) jbd_unlock_bh_journal_head(bh); return jh; } +EXPORT_SYMBOL(jbd2_journal_grab_journal_head); static void __journal_remove_journal_head(struct buffer_head *bh) { @@ -3024,6 +3025,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_unlock_bh_journal_head(bh); } } +EXPORT_SYMBOL(jbd2_journal_put_journal_head); /* * Initialize jbd inode head -- cgit v1.2.3 From ddf4b773aa40790dfa936bd845c18e735a49c61c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Sat, 29 Jan 2022 13:41:27 -0800 Subject: ocfs2: fix a deadlock when commit trans commit 6f1b228529ae introduces a regression which can deadlock as follows: Task1: Task2: jbd2_journal_commit_transaction ocfs2_test_bg_bit_allocatable spin_lock(&jh->b_state_lock) jbd_lock_bh_journal_head __jbd2_journal_remove_checkpoint spin_lock(&jh->b_state_lock) jbd2_journal_put_journal_head jbd_lock_bh_journal_head Task1 and Task2 lock bh->b_state and jh->b_state_lock in different order, which finally result in a deadlock. So use jbd2_journal_[grab|put]_journal_head instead in ocfs2_test_bg_bit_allocatable() to fix it. Link: https://lkml.kernel.org/r/20220121071205.100648-3-joseph.qi@linux.alibaba.com Fixes: 6f1b228529ae ("ocfs2: fix race between searching chunks and release journal_head from buffer_head") Signed-off-by: Joseph Qi Reported-by: Gautham Ananthakrishna Tested-by: Gautham Ananthakrishna Reported-by: Saeed Mirzamohammadi Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 481017e1dac5..166c8918c825 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct journal_head *jh; - int ret = 1; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh)) + jh = jbd2_journal_grab_journal_head(bg_bh); + if (!jh) return 1; - jbd_lock_bh_journal_head(bg_bh); - if (buffer_jbd(bg_bh)) { - jh = bh2jh(bg_bh); - spin_lock(&jh->b_state_lock); - bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; - spin_unlock(&jh->b_state_lock); - } - jbd_unlock_bh_journal_head(bg_bh); + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); return ret; } -- cgit v1.2.3