summaryrefslogtreecommitdiffstats
path: root/mm/memtest.c
diff options
context:
space:
mode:
authorTomas Mudrunka <tomas.mudrunka@gmail.com>2023-03-21 11:34:30 +0100
committerAndrew Morton <akpm@linux-foundation.org>2023-04-05 19:42:55 -0700
commitbd23024b9774e681cbe6cc3afcb24244dfcb2390 (patch)
tree660d52ca5ef5b776a2299b5a189add72d34c39c9 /mm/memtest.c
parentc9bb52738b39fabc8b6b9446f0d194eedb3e5a10 (diff)
downloadlinux-bd23024b9774e681cbe6cc3afcb24244dfcb2390.tar.gz
linux-bd23024b9774e681cbe6cc3afcb24244dfcb2390.tar.bz2
linux-bd23024b9774e681cbe6cc3afcb24244dfcb2390.zip
mm/memtest: add results of early memtest to /proc/meminfo
Currently the memtest results were only presented in dmesg. When running a large fleet of devices without ECC RAM it's currently not easy to do bulk monitoring for memory corruption. You have to parse dmesg, but that's a ring buffer so the error might disappear after some time. In general I do not consider dmesg to be a great API to query RAM status. In several companies I've seen such errors remain undetected and cause issues for way too long. So I think it makes sense to provide a monitoring API, so that we can safely detect and act upon them. This adds /proc/meminfo entry which can be easily used by scripts. Link: https://lkml.kernel.org/r/20230321103430.7130-1-tomas.mudrunka@gmail.com Signed-off-by: Tomas Mudrunka <tomas.mudrunka@gmail.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/memtest.c')
-rw-r--r--mm/memtest.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/mm/memtest.c b/mm/memtest.c
index f53ace709ccd..57149dfee438 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -4,6 +4,9 @@
#include <linux/init.h>
#include <linux/memblock.h>
+bool early_memtest_done;
+phys_addr_t early_memtest_bad_size;
+
static u64 patterns[] __initdata = {
/* The first entry has to be 0 to leave memtest with zeroed memory */
0,
@@ -30,6 +33,7 @@ static void __init reserve_bad_mem(u64 pattern, phys_addr_t start_bad, phys_addr
pr_info(" %016llx bad mem addr %pa - %pa reserved\n",
cpu_to_be64(pattern), &start_bad, &end_bad);
memblock_reserve(start_bad, end_bad - start_bad);
+ early_memtest_bad_size += (end_bad - start_bad);
}
static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size)
@@ -61,6 +65,8 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size
}
if (start_bad)
reserve_bad_mem(pattern, start_bad, last_bad + incr);
+
+ early_memtest_done = true;
}
static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end)