diff options
author | SeongJae Park <sj@kernel.org> | 2023-01-03 18:07:53 +0000 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2023-01-18 17:12:56 -0800 |
commit | baa489fabd01596d5426d6e112b34ba5fb59ab82 (patch) | |
tree | 4647b2962ac3eee6a6bec6d9ed75a03df169ef68 /tools/testing/selftests/vm/khugepaged.c | |
parent | 799fb82aa132fa3a3886b7872997a5a84e820062 (diff) | |
download | linux-stable-baa489fabd01596d5426d6e112b34ba5fb59ab82.tar.gz linux-stable-baa489fabd01596d5426d6e112b34ba5fb59ab82.tar.bz2 linux-stable-baa489fabd01596d5426d6e112b34ba5fb59ab82.zip |
selftests/vm: rename selftests/vm to selftests/mm
Rename selftets/vm to selftests/mm for being more consistent with the
code, documentation, and tools directories, and won't be confused with
virtual machines.
[sj@kernel.org: convert missing vm->mm changes]
Link: https://lkml.kernel.org/r/20230107230643.252273-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20230103180754.129637-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'tools/testing/selftests/vm/khugepaged.c')
-rw-r--r-- | tools/testing/selftests/vm/khugepaged.c | 1558 |
1 files changed, 0 insertions, 1558 deletions
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c deleted file mode 100644 index 64126c8cd561..000000000000 --- a/tools/testing/selftests/vm/khugepaged.c +++ /dev/null @@ -1,1558 +0,0 @@ -#define _GNU_SOURCE -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <dirent.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> - -#include <sys/mman.h> -#include <sys/wait.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/sysmacros.h> -#include <sys/vfs.h> - -#include "linux/magic.h" - -#include "vm_util.h" - -#ifndef MADV_PAGEOUT -#define MADV_PAGEOUT 21 -#endif -#ifndef MADV_POPULATE_READ -#define MADV_POPULATE_READ 22 -#endif -#ifndef MADV_COLLAPSE -#define MADV_COLLAPSE 25 -#endif - -#define BASE_ADDR ((void *)(1UL << 30)) -static unsigned long hpage_pmd_size; -static unsigned long page_size; -static int hpage_pmd_nr; - -#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" -#define PID_SMAPS "/proc/self/smaps" -#define TEST_FILE "collapse_test_file" - -#define MAX_LINE_LENGTH 500 - -enum vma_type { - VMA_ANON, - VMA_FILE, - VMA_SHMEM, -}; - -struct mem_ops { - void *(*setup_area)(int nr_hpages); - void (*cleanup_area)(void *p, unsigned long size); - void (*fault)(void *p, unsigned long start, unsigned long end); - bool (*check_huge)(void *addr, int nr_hpages); - const char *name; -}; - -static struct mem_ops *file_ops; -static struct mem_ops *anon_ops; -static struct mem_ops *shmem_ops; - -struct collapse_context { - void (*collapse)(const char *msg, char *p, int nr_hpages, - struct mem_ops *ops, bool expect); - bool enforce_pte_scan_limits; - const char *name; -}; - -static struct collapse_context *khugepaged_context; -static struct collapse_context *madvise_context; - -struct file_info { - const char *dir; - char path[PATH_MAX]; - enum vma_type type; - int fd; - char dev_queue_read_ahead_path[PATH_MAX]; -}; - -static struct file_info finfo; - -enum thp_enabled { - THP_ALWAYS, - THP_MADVISE, - THP_NEVER, -}; - -static const char *thp_enabled_strings[] = { - "always", - "madvise", - "never", - NULL -}; - -enum thp_defrag { - THP_DEFRAG_ALWAYS, - THP_DEFRAG_DEFER, - THP_DEFRAG_DEFER_MADVISE, - THP_DEFRAG_MADVISE, - THP_DEFRAG_NEVER, -}; - -static const char *thp_defrag_strings[] = { - "always", - "defer", - "defer+madvise", - "madvise", - "never", - NULL -}; - -enum shmem_enabled { - SHMEM_ALWAYS, - SHMEM_WITHIN_SIZE, - SHMEM_ADVISE, - SHMEM_NEVER, - SHMEM_DENY, - SHMEM_FORCE, -}; - -static const char *shmem_enabled_strings[] = { - "always", - "within_size", - "advise", - "never", - "deny", - "force", - NULL -}; - -struct khugepaged_settings { - bool defrag; - unsigned int alloc_sleep_millisecs; - unsigned int scan_sleep_millisecs; - unsigned int max_ptes_none; - unsigned int max_ptes_swap; - unsigned int max_ptes_shared; - unsigned long pages_to_scan; -}; - -struct settings { - enum thp_enabled thp_enabled; - enum thp_defrag thp_defrag; - enum shmem_enabled shmem_enabled; - bool use_zero_page; - struct khugepaged_settings khugepaged; - unsigned long read_ahead_kb; -}; - -static struct settings saved_settings; -static bool skip_settings_restore; - -static int exit_status; - -static void success(const char *msg) -{ - printf(" \e[32m%s\e[0m\n", msg); -} - -static void fail(const char *msg) -{ - printf(" \e[31m%s\e[0m\n", msg); - exit_status++; -} - -static void skip(const char *msg) -{ - printf(" \e[33m%s\e[0m\n", msg); -} - -static int read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - -static int write_file(const char *path, const char *buf, size_t buflen) -{ - int fd; - ssize_t numwritten; - - fd = open(path, O_WRONLY); - if (fd == -1) { - printf("open(%s)\n", path); - exit(EXIT_FAILURE); - return 0; - } - - numwritten = write(fd, buf, buflen - 1); - close(fd); - if (numwritten < 1) { - printf("write(%s)\n", buf); - exit(EXIT_FAILURE); - return 0; - } - - return (unsigned int) numwritten; -} - -static int read_string(const char *name, const char *strings[]) -{ - char path[PATH_MAX]; - char buf[256]; - char *c; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!read_file(path, buf, sizeof(buf))) { - perror(path); - exit(EXIT_FAILURE); - } - - c = strchr(buf, '['); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - - c++; - memmove(buf, c, sizeof(buf) - (c - buf)); - - c = strchr(buf, ']'); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - *c = '\0'; - - ret = 0; - while (strings[ret]) { - if (!strcmp(strings[ret], buf)) - return ret; - ret++; - } - - printf("Failed to parse %s\n", name); - exit(EXIT_FAILURE); -} - -static void write_string(const char *name, const char *val) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!write_file(path, val, strlen(val) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static const unsigned long _read_num(const char *path) -{ - char buf[21]; - - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - - return strtoul(buf, NULL, 10); -} - -static const unsigned long read_num(const char *name) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return _read_num(path); -} - -static void _write_num(const char *path, unsigned long num) -{ - char buf[21]; - - sprintf(buf, "%ld", num); - if (!write_file(path, buf, strlen(buf) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static void write_num(const char *name, unsigned long num) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - _write_num(path, num); -} - -static void write_settings(struct settings *settings) -{ - struct khugepaged_settings *khugepaged = &settings->khugepaged; - - write_string("enabled", thp_enabled_strings[settings->thp_enabled]); - write_string("defrag", thp_defrag_strings[settings->thp_defrag]); - write_string("shmem_enabled", - shmem_enabled_strings[settings->shmem_enabled]); - write_num("use_zero_page", settings->use_zero_page); - - write_num("khugepaged/defrag", khugepaged->defrag); - write_num("khugepaged/alloc_sleep_millisecs", - khugepaged->alloc_sleep_millisecs); - write_num("khugepaged/scan_sleep_millisecs", - khugepaged->scan_sleep_millisecs); - write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); - write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); - write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); - write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); - - if (file_ops && finfo.type == VMA_FILE) - _write_num(finfo.dev_queue_read_ahead_path, - settings->read_ahead_kb); -} - -#define MAX_SETTINGS_DEPTH 4 -static struct settings settings_stack[MAX_SETTINGS_DEPTH]; -static int settings_index; - -static struct settings *current_settings(void) -{ - if (!settings_index) { - printf("Fail: No settings set"); - exit(EXIT_FAILURE); - } - return settings_stack + settings_index - 1; -} - -static void push_settings(struct settings *settings) -{ - if (settings_index >= MAX_SETTINGS_DEPTH) { - printf("Fail: Settings stack exceeded"); - exit(EXIT_FAILURE); - } - settings_stack[settings_index++] = *settings; - write_settings(current_settings()); -} - -static void pop_settings(void) -{ - if (settings_index <= 0) { - printf("Fail: Settings stack empty"); - exit(EXIT_FAILURE); - } - --settings_index; - write_settings(current_settings()); -} - -static void restore_settings(int sig) -{ - if (skip_settings_restore) - goto out; - - printf("Restore THP and khugepaged settings..."); - write_settings(&saved_settings); - success("OK"); - if (sig) - exit(EXIT_FAILURE); -out: - exit(exit_status); -} - -static void save_settings(void) -{ - printf("Save THP and khugepaged settings..."); - saved_settings = (struct settings) { - .thp_enabled = read_string("enabled", thp_enabled_strings), - .thp_defrag = read_string("defrag", thp_defrag_strings), - .shmem_enabled = - read_string("shmem_enabled", shmem_enabled_strings), - .use_zero_page = read_num("use_zero_page"), - }; - saved_settings.khugepaged = (struct khugepaged_settings) { - .defrag = read_num("khugepaged/defrag"), - .alloc_sleep_millisecs = - read_num("khugepaged/alloc_sleep_millisecs"), - .scan_sleep_millisecs = - read_num("khugepaged/scan_sleep_millisecs"), - .max_ptes_none = read_num("khugepaged/max_ptes_none"), - .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), - .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), - .pages_to_scan = read_num("khugepaged/pages_to_scan"), - }; - if (file_ops && finfo.type == VMA_FILE) - saved_settings.read_ahead_kb = - _read_num(finfo.dev_queue_read_ahead_path); - - success("OK"); - - signal(SIGTERM, restore_settings); - signal(SIGINT, restore_settings); - signal(SIGHUP, restore_settings); - signal(SIGQUIT, restore_settings); -} - -static void get_finfo(const char *dir) -{ - struct stat path_stat; - struct statfs fs; - char buf[1 << 10]; - char path[PATH_MAX]; - char *str, *end; - - finfo.dir = dir; - stat(finfo.dir, &path_stat); - if (!S_ISDIR(path_stat.st_mode)) { - printf("%s: Not a directory (%s)\n", __func__, finfo.dir); - exit(EXIT_FAILURE); - } - if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE, - finfo.dir) >= sizeof(finfo.path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - if (statfs(finfo.dir, &fs)) { - perror("statfs()"); - exit(EXIT_FAILURE); - } - finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE; - if (finfo.type == VMA_SHMEM) - return; - - /* Find owning device's queue/read_ahead_kb control */ - if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent", - major(path_stat.st_dev), minor(path_stat.st_dev)) - >= sizeof(path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - if (strstr(buf, "DEVTYPE=disk")) { - /* Found it */ - if (snprintf(finfo.dev_queue_read_ahead_path, - sizeof(finfo.dev_queue_read_ahead_path), - "/sys/dev/block/%d:%d/queue/read_ahead_kb", - major(path_stat.st_dev), minor(path_stat.st_dev)) - >= sizeof(finfo.dev_queue_read_ahead_path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return; - } - if (!strstr(buf, "DEVTYPE=partition")) { - printf("%s: Unknown device type: %s\n", __func__, path); - exit(EXIT_FAILURE); - } - /* - * Partition of block device - need to find actual device. - * Using naming convention that devnameN is partition of - * device devname. - */ - str = strstr(buf, "DEVNAME="); - if (!str) { - printf("%s: Could not read: %s", __func__, path); - exit(EXIT_FAILURE); - } - str += 8; - end = str; - while (*end) { - if (isdigit(*end)) { - *end = '\0'; - if (snprintf(finfo.dev_queue_read_ahead_path, - sizeof(finfo.dev_queue_read_ahead_path), - "/sys/block/%s/queue/read_ahead_kb", - str) >= sizeof(finfo.dev_queue_read_ahead_path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return; - } - ++end; - } - printf("%s: Could not read: %s\n", __func__, path); - exit(EXIT_FAILURE); -} - -static bool check_swap(void *addr, unsigned long size) -{ - bool swap = false; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - - - fp = fopen(PID_SMAPS, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); - exit(EXIT_FAILURE); - } - if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) - goto err_out; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB", - size >> 10); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - /* - * Fetch the Swap: in the same block and check whether it got - * the expected number of hugeepages next. - */ - if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer))) - goto err_out; - - if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) - goto err_out; - - swap = true; -err_out: - fclose(fp); - return swap; -} - -static void *alloc_mapping(int nr) -{ - void *p; - - p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (p != BASE_ADDR) { - printf("Failed to allocate VMA at %p\n", BASE_ADDR); - exit(EXIT_FAILURE); - } - - return p; -} - -static void fill_memory(int *p, unsigned long start, unsigned long end) -{ - int i; - - for (i = start / page_size; i < end / page_size; i++) - p[i * page_size / sizeof(*p)] = i + 0xdead0000; -} - -/* - * MADV_COLLAPSE is a best-effort request and may fail if an internal - * resource is temporarily unavailable, in which case it will set errno to - * EAGAIN. In such a case, immediately reattempt the operation one more - * time. - */ -static int madvise_collapse_retry(void *p, unsigned long size) -{ - bool retry = true; - int ret; - -retry: - ret = madvise(p, size, MADV_COLLAPSE); - if (ret && errno == EAGAIN && retry) { - retry = false; - goto retry; - } - return ret; -} - -/* - * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with - * validate_memory()'able contents. - */ -static void *alloc_hpage(struct mem_ops *ops) -{ - void *p = ops->setup_area(1); - - ops->fault(p, 0, hpage_pmd_size); - - /* - * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE. - * The latter is ineligible for collapse by MADV_COLLAPSE - * while the former might cause MADV_COLLAPSE to race with - * khugepaged on low-load system (like a test machine), which - * would cause MADV_COLLAPSE to fail with EAGAIN. - */ - printf("Allocate huge page..."); - if (madvise_collapse_retry(p, hpage_pmd_size)) { - perror("madvise(MADV_COLLAPSE)"); - exit(EXIT_FAILURE); - } - if (!ops->check_huge(p, 1)) { - perror("madvise(MADV_COLLAPSE)"); - exit(EXIT_FAILURE); - } - if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) { - perror("madvise(MADV_HUGEPAGE)"); - exit(EXIT_FAILURE); - } - success("OK"); - return p; -} - -static void validate_memory(int *p, unsigned long start, unsigned long end) -{ - int i; - - for (i = start / page_size; i < end / page_size; i++) { - if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) { - printf("Page %d is corrupted: %#x\n", - i, p[i * page_size / sizeof(*p)]); - exit(EXIT_FAILURE); - } - } -} - -static void *anon_setup_area(int nr_hpages) -{ - return alloc_mapping(nr_hpages); -} - -static void anon_cleanup_area(void *p, unsigned long size) -{ - munmap(p, size); -} - -static void anon_fault(void *p, unsigned long start, unsigned long end) -{ - fill_memory(p, start, end); -} - -static bool anon_check_huge(void *addr, int nr_hpages) -{ - return check_huge_anon(addr, nr_hpages, hpage_pmd_size); -} - -static void *file_setup_area(int nr_hpages) -{ - int fd; - void *p; - unsigned long size; - - unlink(finfo.path); /* Cleanup from previous failed tests */ - printf("Creating %s for collapse%s...", finfo.path, - finfo.type == VMA_SHMEM ? " (tmpfs)" : ""); - fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL, - 777); - if (fd < 0) { - perror("open()"); - exit(EXIT_FAILURE); - } - - size = nr_hpages * hpage_pmd_size; - p = alloc_mapping(nr_hpages); - fill_memory(p, 0, size); - write(fd, p, size); - close(fd); - munmap(p, size); - success("OK"); - - printf("Opening %s read only for collapse...", finfo.path); - finfo.fd = open(finfo.path, O_RDONLY, 777); - if (finfo.fd < 0) { - perror("open()"); - exit(EXIT_FAILURE); - } - p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, - MAP_PRIVATE, finfo.fd, 0); - if (p == MAP_FAILED || p != BASE_ADDR) { - perror("mmap()"); - exit(EXIT_FAILURE); - } - - /* Drop page cache */ - write_file("/proc/sys/vm/drop_caches", "3", 2); - success("OK"); - return p; -} - -static void file_cleanup_area(void *p, unsigned long size) -{ - munmap(p, size); - close(finfo.fd); - unlink(finfo.path); -} - -static void file_fault(void *p, unsigned long start, unsigned long end) -{ - if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) { - perror("madvise(MADV_POPULATE_READ"); - exit(EXIT_FAILURE); - } -} - -static bool file_check_huge(void *addr, int nr_hpages) -{ - switch (finfo.type) { - case VMA_FILE: - return check_huge_file(addr, nr_hpages, hpage_pmd_size); - case VMA_SHMEM: - return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); - default: - exit(EXIT_FAILURE); - return false; - } -} - -static void *shmem_setup_area(int nr_hpages) -{ - void *p; - unsigned long size = nr_hpages * hpage_pmd_size; - - finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0); - if (finfo.fd < 0) { - perror("memfd_create()"); - exit(EXIT_FAILURE); - } - if (ftruncate(finfo.fd, size)) { - perror("ftruncate()"); - exit(EXIT_FAILURE); - } - p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd, - 0); - if (p != BASE_ADDR) { - perror("mmap()"); - exit(EXIT_FAILURE); - } - return p; -} - -static void shmem_cleanup_area(void *p, unsigned long size) -{ - munmap(p, size); - close(finfo.fd); -} - -static bool shmem_check_huge(void *addr, int nr_hpages) -{ - return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); -} - -static struct mem_ops __anon_ops = { - .setup_area = &anon_setup_area, - .cleanup_area = &anon_cleanup_area, - .fault = &anon_fault, - .check_huge = &anon_check_huge, - .name = "anon", -}; - -static struct mem_ops __file_ops = { - .setup_area = &file_setup_area, - .cleanup_area = &file_cleanup_area, - .fault = &file_fault, - .check_huge = &file_check_huge, - .name = "file", -}; - -static struct mem_ops __shmem_ops = { - .setup_area = &shmem_setup_area, - .cleanup_area = &shmem_cleanup_area, - .fault = &anon_fault, - .check_huge = &shmem_check_huge, - .name = "shmem", -}; - -static void __madvise_collapse(const char *msg, char *p, int nr_hpages, - struct mem_ops *ops, bool expect) -{ - int ret; - struct settings settings = *current_settings(); - - printf("%s...", msg); - - /* - * Prevent khugepaged interference and tests that MADV_COLLAPSE - * ignores /sys/kernel/mm/transparent_hugepage/enabled - */ - settings.thp_enabled = THP_NEVER; - settings.shmem_enabled = SHMEM_NEVER; - push_settings(&settings); - - /* Clear VM_NOHUGEPAGE */ - madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); - ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size); - if (((bool)ret) == expect) - fail("Fail: Bad return value"); - else if (!ops->check_huge(p, expect ? nr_hpages : 0)) - fail("Fail: check_huge()"); - else - success("OK"); - - pop_settings(); -} - -static void madvise_collapse(const char *msg, char *p, int nr_hpages, - struct mem_ops *ops, bool expect) -{ - /* Sanity check */ - if (!ops->check_huge(p, 0)) { - printf("Unexpected huge page\n"); - exit(EXIT_FAILURE); - } - __madvise_collapse(msg, p, nr_hpages, ops, expect); -} - -#define TICK 500000 -static bool wait_for_scan(const char *msg, char *p, int nr_hpages, - struct mem_ops *ops) -{ - int full_scans; - int timeout = 6; /* 3 seconds */ - - /* Sanity check */ - if (!ops->check_huge(p, 0)) { - printf("Unexpected huge page\n"); - exit(EXIT_FAILURE); - } - - madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); - - /* Wait until the second full_scan completed */ - full_scans = read_num("khugepaged/full_scans") + 2; - - printf("%s...", msg); - while (timeout--) { - if (ops->check_huge(p, nr_hpages)) - break; - if (read_num("khugepaged/full_scans") >= full_scans) - break; - printf("."); - usleep(TICK); - } - - madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); - - return timeout == -1; -} - -static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, - struct mem_ops *ops, bool expect) -{ - if (wait_for_scan(msg, p, nr_hpages, ops)) { - if (expect) - fail("Timeout"); - else - success("OK"); - return; - } - - /* - * For file and shmem memory, khugepaged only retracts pte entries after - * putting the new hugepage in the page cache. The hugepage must be - * subsequently refaulted to install the pmd mapping for the mm. - */ - if (ops != &__anon_ops) - ops->fault(p, 0, nr_hpages * hpage_pmd_size); - - if (ops->check_huge(p, expect ? nr_hpages : 0)) - success("OK"); - else - fail("Fail"); -} - -static struct collapse_context __khugepaged_context = { - .collapse = &khugepaged_collapse, - .enforce_pte_scan_limits = true, - .name = "khugepaged", -}; - -static struct collapse_context __madvise_context = { - .collapse = &madvise_collapse, - .enforce_pte_scan_limits = false, - .name = "madvise", -}; - -static bool is_tmpfs(struct mem_ops *ops) -{ - return ops == &__file_ops && finfo.type == VMA_SHMEM; -} - -static void alloc_at_fault(void) -{ - struct settings settings = *current_settings(); - char *p; - - settings.thp_enabled = THP_ALWAYS; - push_settings(&settings); - - p = alloc_mapping(1); - *p = 1; - printf("Allocate huge page on fault..."); - if (check_huge_anon(p, 1, hpage_pmd_size)) - success("OK"); - else - fail("Fail"); - - pop_settings(); - - madvise(p, page_size, MADV_DONTNEED); - printf("Split huge PMD on MADV_DONTNEED..."); - if (check_huge_anon(p, 0, hpage_pmd_size)) - success("OK"); - else - fail("Fail"); - munmap(p, hpage_pmd_size); -} - -static void collapse_full(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - int nr_hpages = 4; - unsigned long size = nr_hpages * hpage_pmd_size; - - p = ops->setup_area(nr_hpages); - ops->fault(p, 0, size); - c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages, - ops, true); - validate_memory(p, 0, size); - ops->cleanup_area(p, size); -} - -static void collapse_empty(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - - p = ops->setup_area(1); - c->collapse("Do not collapse empty PTE table", p, 1, ops, false); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - - p = ops->setup_area(1); - ops->fault(p, 0, page_size); - c->collapse("Collapse PTE table with single PTE entry present", p, - 1, ops, true); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) -{ - int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = *current_settings(); - void *p; - - settings.khugepaged.max_ptes_none = max_ptes_none; - push_settings(&settings); - - p = ops->setup_area(1); - - if (is_tmpfs(ops)) { - /* shmem pages always in the page cache */ - printf("tmpfs..."); - skip("Skip"); - goto skip; - } - - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, - ops, !c->enforce_pte_scan_limits); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - - if (c->enforce_pte_scan_limits) { - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops, - true); - validate_memory(p, 0, - (hpage_pmd_nr - max_ptes_none) * page_size); - } -skip: - ops->cleanup_area(p, hpage_pmd_size); - pop_settings(); -} - -static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - - p = ops->setup_area(1); - ops->fault(p, 0, hpage_pmd_size); - - printf("Swapout one page..."); - if (madvise(p, page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } - - c->collapse("Collapse with swapping in single PTE entry", p, 1, ops, - true); - validate_memory(p, 0, hpage_pmd_size); -out: - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) -{ - int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); - void *p; - - p = ops->setup_area(1); - ops->fault(p, 0, hpage_pmd_size); - - printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); - if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, (max_ptes_swap + 1) * page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } - - c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops, - !c->enforce_pte_scan_limits); - validate_memory(p, 0, hpage_pmd_size); - - if (c->enforce_pte_scan_limits) { - ops->fault(p, 0, hpage_pmd_size); - printf("Swapout %d of %d pages...", max_ptes_swap, - hpage_pmd_nr); - if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, max_ptes_swap * page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } - - c->collapse("Collapse with max_ptes_swap pages swapped out", p, - 1, ops, true); - validate_memory(p, 0, hpage_pmd_size); - } -out: - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - - p = alloc_hpage(ops); - - if (is_tmpfs(ops)) { - /* MADV_DONTNEED won't evict tmpfs pages */ - printf("tmpfs..."); - skip("Skip"); - goto skip; - } - - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - printf("Split huge page leaving single PTE mapping compound page..."); - madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - - c->collapse("Collapse PTE table with single PTE mapping compound page", - p, 1, ops, true); - validate_memory(p, 0, page_size); -skip: - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - - p = alloc_hpage(ops); - printf("Split huge page leaving single PTE page table full of compound pages..."); - madvise(p, page_size, MADV_NOHUGEPAGE); - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - - c->collapse("Collapse PTE table full of compound pages", p, 1, ops, - true); - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops) -{ - void *p; - int i; - - p = ops->setup_area(1); - for (i = 0; i < hpage_pmd_nr; i++) { - printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", - i + 1, hpage_pmd_nr); - - madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); - ops->fault(BASE_ADDR, 0, hpage_pmd_size); - if (!ops->check_huge(BASE_ADDR, 1)) { - printf("Failed to allocate huge page\n"); - exit(EXIT_FAILURE); - } - madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE); - - p = mremap(BASE_ADDR - i * page_size, - i * page_size + hpage_pmd_size, - (i + 1) * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, - BASE_ADDR + 2 * hpage_pmd_size); - if (p == MAP_FAILED) { - perror("mremap+unmap"); - exit(EXIT_FAILURE); - } - - p = mremap(BASE_ADDR + 2 * hpage_pmd_size, - (i + 1) * page_size, - (i + 1) * page_size + hpage_pmd_size, - MREMAP_MAYMOVE | MREMAP_FIXED, - BASE_ADDR - (i + 1) * page_size); - if (p == MAP_FAILED) { - perror("mremap+alloc"); - exit(EXIT_FAILURE); - } - } - - ops->cleanup_area(BASE_ADDR, hpage_pmd_size); - ops->fault(p, 0, hpage_pmd_size); - if (!ops->check_huge(p, 1)) - success("OK"); - else - fail("Fail"); - - c->collapse("Collapse PTE table full of different compound pages", p, 1, - ops, true); - - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_fork(struct collapse_context *c, struct mem_ops *ops) -{ - int wstatus; - void *p; - - p = ops->setup_area(1); - - printf("Allocate small page..."); - ops->fault(p, 0, page_size); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - - printf("Share small page over fork()..."); - if (!fork()) { - /* Do not touch settings on child exit */ - skip_settings_restore = true; - exit_status = 0; - - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - - ops->fault(p, page_size, 2 * page_size); - c->collapse("Collapse PTE table with single page shared with parent process", - p, 1, ops, true); - - validate_memory(p, 0, page_size); - ops->cleanup_area(p, hpage_pmd_size); - exit(exit_status); - } - - wait(&wstatus); - exit_status += WEXITSTATUS(wstatus); - - printf("Check if parent still has small page..."); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, page_size); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops) -{ - int wstatus; - void *p; - - p = alloc_hpage(ops); - printf("Share huge page over fork()..."); - if (!fork()) { - /* Do not touch settings on child exit */ - skip_settings_restore = true; - exit_status = 0; - - if (ops->check_huge(p, 1)) - success("OK"); - else - fail("Fail"); - - printf("Split huge page PMD in child process..."); - madvise(p, page_size, MADV_NOHUGEPAGE); - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - ops->fault(p, 0, page_size); - - write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); - c->collapse("Collapse PTE table full of compound pages in child", - p, 1, ops, true); - write_num("khugepaged/max_ptes_shared", - current_settings()->khugepaged.max_ptes_shared); - - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); - exit(exit_status); - } - - wait(&wstatus); - exit_status += WEXITSTATUS(wstatus); - - printf("Check if parent still has huge page..."); - if (ops->check_huge(p, 1)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) -{ - int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); - int wstatus; - void *p; - - p = alloc_hpage(ops); - printf("Share huge page over fork()..."); - if (!fork()) { - /* Do not touch settings on child exit */ - skip_settings_restore = true; - exit_status = 0; - - if (ops->check_huge(p, 1)) - success("OK"); - else - fail("Fail"); - - printf("Trigger CoW on page %d of %d...", - hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - - c->collapse("Maybe collapse with max_ptes_shared exceeded", p, - 1, ops, !c->enforce_pte_scan_limits); - - if (c->enforce_pte_scan_limits) { - printf("Trigger CoW on page %d of %d...", - hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) * - page_size); - if (ops->check_huge(p, 0)) - success("OK"); - else - fail("Fail"); - - c->collapse("Collapse with max_ptes_shared PTEs shared", - p, 1, ops, true); - } - - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); - exit(exit_status); - } - - wait(&wstatus); - exit_status += WEXITSTATUS(wstatus); - - printf("Check if parent still has huge page..."); - if (ops->check_huge(p, 1)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); -} - -static void madvise_collapse_existing_thps(struct collapse_context *c, - struct mem_ops *ops) -{ - void *p; - - p = ops->setup_area(1); - ops->fault(p, 0, hpage_pmd_size); - c->collapse("Collapse fully populated PTE table...", p, 1, ops, true); - validate_memory(p, 0, hpage_pmd_size); - - /* c->collapse() will find a hugepage and complain - call directly. */ - __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true); - validate_memory(p, 0, hpage_pmd_size); - ops->cleanup_area(p, hpage_pmd_size); -} - -/* - * Test race with khugepaged where page tables have been retracted and - * pmd cleared. - */ -static void madvise_retracted_page_tables(struct collapse_context *c, - struct mem_ops *ops) -{ - void *p; - int nr_hpages = 1; - unsigned long size = nr_hpages * hpage_pmd_size; - - p = ops->setup_area(nr_hpages); - ops->fault(p, 0, size); - - /* Let khugepaged collapse and leave pmd cleared */ - if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages, - ops)) { - fail("Timeout"); - return; - } - success("OK"); - c->collapse("Install huge PMD from page cache", p, nr_hpages, ops, - true); - validate_memory(p, 0, size); - ops->cleanup_area(p, size); -} - -static void usage(void) -{ - fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n"); - fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n"); - fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n"); - fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n"); - fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); - fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); - fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); - fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); - fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); - exit(1); -} - -static void parse_test_type(int argc, const char **argv) -{ - char *buf; - const char *token; - - if (argc == 1) { - /* Backwards compatibility */ - khugepaged_context = &__khugepaged_context; - madvise_context = &__madvise_context; - anon_ops = &__anon_ops; - return; - } - - buf = strdup(argv[1]); - token = strsep(&buf, ":"); - - if (!strcmp(token, "all")) { - khugepaged_context = &__khugepaged_context; - madvise_context = &__madvise_context; - } else if (!strcmp(token, "khugepaged")) { - khugepaged_context = &__khugepaged_context; - } else if (!strcmp(token, "madvise")) { - madvise_context = &__madvise_context; - } else { - usage(); - } - - if (!buf) - usage(); - - if (!strcmp(buf, "all")) { - file_ops = &__file_ops; - anon_ops = &__anon_ops; - shmem_ops = &__shmem_ops; - } else if (!strcmp(buf, "anon")) { - anon_ops = &__anon_ops; - } else if (!strcmp(buf, "file")) { - file_ops = &__file_ops; - } else if (!strcmp(buf, "shmem")) { - shmem_ops = &__shmem_ops; - } else { - usage(); - } - - if (!file_ops) - return; - - if (argc != 3) - usage(); -} - -int main(int argc, const char **argv) -{ - struct settings default_settings = { - .thp_enabled = THP_MADVISE, - .thp_defrag = THP_DEFRAG_ALWAYS, - .shmem_enabled = SHMEM_ADVISE, - .use_zero_page = 0, - .khugepaged = { - .defrag = 1, - .alloc_sleep_millisecs = 10, - .scan_sleep_millisecs = 10, - }, - /* - * When testing file-backed memory, the collapse path - * looks at how many pages are found in the page cache, not - * what pages are mapped. Disable read ahead optimization so - * pages don't find their way into the page cache unless - * we mem_ops->fault() them in. - */ - .read_ahead_kb = 0, - }; - - parse_test_type(argc, argv); - - if (file_ops) - get_finfo(argv[2]); - - setbuf(stdout, NULL); - - page_size = getpagesize(); - hpage_pmd_size = read_pmd_pagesize(); - hpage_pmd_nr = hpage_pmd_size / page_size; - - default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; - default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; - default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; - default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; - - save_settings(); - push_settings(&default_settings); - - alloc_at_fault(); - -#define TEST(t, c, o) do { \ - if (c && o) { \ - printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \ - t(c, o); \ - } \ - } while (0) - - TEST(collapse_full, khugepaged_context, anon_ops); - TEST(collapse_full, khugepaged_context, file_ops); - TEST(collapse_full, khugepaged_context, shmem_ops); - TEST(collapse_full, madvise_context, anon_ops); - TEST(collapse_full, madvise_context, file_ops); - TEST(collapse_full, madvise_context, shmem_ops); - - TEST(collapse_empty, khugepaged_context, anon_ops); - TEST(collapse_empty, madvise_context, anon_ops); - - TEST(collapse_single_pte_entry, khugepaged_context, anon_ops); - TEST(collapse_single_pte_entry, khugepaged_context, file_ops); - TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops); - TEST(collapse_single_pte_entry, madvise_context, anon_ops); - TEST(collapse_single_pte_entry, madvise_context, file_ops); - TEST(collapse_single_pte_entry, madvise_context, shmem_ops); - - TEST(collapse_max_ptes_none, khugepaged_context, anon_ops); - TEST(collapse_max_ptes_none, khugepaged_context, file_ops); - TEST(collapse_max_ptes_none, madvise_context, anon_ops); - TEST(collapse_max_ptes_none, madvise_context, file_ops); - - TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops); - TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops); - TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops); - TEST(collapse_single_pte_entry_compound, madvise_context, file_ops); - - TEST(collapse_full_of_compound, khugepaged_context, anon_ops); - TEST(collapse_full_of_compound, khugepaged_context, file_ops); - TEST(collapse_full_of_compound, khugepaged_context, shmem_ops); - TEST(collapse_full_of_compound, madvise_context, anon_ops); - TEST(collapse_full_of_compound, madvise_context, file_ops); - TEST(collapse_full_of_compound, madvise_context, shmem_ops); - - TEST(collapse_compound_extreme, khugepaged_context, anon_ops); - TEST(collapse_compound_extreme, madvise_context, anon_ops); - - TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops); - TEST(collapse_swapin_single_pte, madvise_context, anon_ops); - - TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops); - TEST(collapse_max_ptes_swap, madvise_context, anon_ops); - - TEST(collapse_fork, khugepaged_context, anon_ops); - TEST(collapse_fork, madvise_context, anon_ops); - - TEST(collapse_fork_compound, khugepaged_context, anon_ops); - TEST(collapse_fork_compound, madvise_context, anon_ops); - - TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops); - TEST(collapse_max_ptes_shared, madvise_context, anon_ops); - - TEST(madvise_collapse_existing_thps, madvise_context, anon_ops); - TEST(madvise_collapse_existing_thps, madvise_context, file_ops); - TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops); - - TEST(madvise_retracted_page_tables, madvise_context, file_ops); - TEST(madvise_retracted_page_tables, madvise_context, shmem_ops); - - restore_settings(0); -} |