diff options
Diffstat (limited to 'tools')
72 files changed, 2888 insertions, 524 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4fa4bc4505f5..4252fc22f78f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -880,15 +880,26 @@ struct bpf_object *bpf_object__open(const char *path) } struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz) + size_t obj_buf_sz, + const char *name) { + char tmp_name[64]; + /* param validation */ if (!obj_buf || obj_buf_sz <= 0) return NULL; - pr_debug("loading object from buffer\n"); + if (!name) { + snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", + (unsigned long)obj_buf, + (unsigned long)obj_buf_sz); + tmp_name[sizeof(tmp_name) - 1] = '\0'; + name = tmp_name; + } + pr_debug("loading object '%s' from buffer\n", + name); - return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz); + return __bpf_object__open(name, obj_buf, obj_buf_sz); } int bpf_object__unload(struct bpf_object *obj) @@ -975,6 +986,14 @@ bpf_object__next(struct bpf_object *prev) return next; } +const char * +bpf_object__get_name(struct bpf_object *obj) +{ + if (!obj) + return NULL; + return obj->path; +} + struct bpf_program * bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index ea8adc206b62..f16170c95ffd 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -28,12 +28,14 @@ struct bpf_object; struct bpf_object *bpf_object__open(const char *path); struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz); + size_t obj_buf_sz, + const char *name); void bpf_object__close(struct bpf_object *object); /* Load/unload object into/from kernel */ int bpf_object__load(struct bpf_object *obj); int bpf_object__unload(struct bpf_object *obj); +const char *bpf_object__get_name(struct bpf_object *obj); struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index 618c2bcd4eab..2cd3d4c99738 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -22,9 +22,14 @@ #include <string.h> #include <bfd.h> #include <dis-asm.h> +#include <regex.h> +#include <fcntl.h> #include <sys/klog.h> #include <sys/types.h> -#include <regex.h> +#include <sys/stat.h> + +#define CMD_ACTION_SIZE_BUFFER 10 +#define CMD_ACTION_READ_ALL 3 static void get_exec_path(char *tpath, size_t size) { @@ -87,20 +92,66 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes) bfd_close(bfdf); } -static char *get_klog_buff(int *klen) +static char *get_klog_buff(unsigned int *klen) { - int ret, len = klogctl(10, NULL, 0); - char *buff = malloc(len); + int ret, len; + char *buff; + + len = klogctl(CMD_ACTION_SIZE_BUFFER, NULL, 0); + buff = malloc(len); + if (!buff) + return NULL; + + ret = klogctl(CMD_ACTION_READ_ALL, buff, len); + if (ret < 0) { + free(buff); + return NULL; + } - assert(buff && klen); - ret = klogctl(3, buff, len); - assert(ret >= 0); *klen = ret; + return buff; +} +static char *get_flog_buff(const char *file, unsigned int *klen) +{ + int fd, ret, len; + struct stat fi; + char *buff; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + ret = fstat(fd, &fi); + if (ret < 0 || !S_ISREG(fi.st_mode)) + goto out; + + len = fi.st_size + 1; + buff = malloc(len); + if (!buff) + goto out; + + memset(buff, 0, len); + ret = read(fd, buff, len - 1); + if (ret <= 0) + goto out_free; + + close(fd); + *klen = ret; return buff; +out_free: + free(buff); +out: + close(fd); + return NULL; +} + +static char *get_log_buff(const char *file, unsigned int *klen) +{ + return file ? get_flog_buff(file, klen) : get_klog_buff(klen); } -static void put_klog_buff(char *buff) +static void put_log_buff(char *buff) { free(buff); } @@ -138,8 +189,10 @@ static int get_last_jit_image(char *haystack, size_t hlen, ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", &flen, &proglen, &pass, &base); - if (ret != 4) + if (ret != 4) { + regfree(®ex); return 0; + } tmp = ptr = haystack + off; while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) { @@ -169,31 +222,49 @@ static int get_last_jit_image(char *haystack, size_t hlen, return ulen; } +static void usage(void) +{ + printf("Usage: bpf_jit_disasm [...]\n"); + printf(" -o Also display related opcodes (default: off).\n"); + printf(" -f <file> Read last image dump from file or stdin (default: klog).\n"); + printf(" -h Display this help.\n"); +} + int main(int argc, char **argv) { - int len, klen, opcodes = 0; - char *kbuff; + unsigned int len, klen, opt, opcodes = 0; static uint8_t image[32768]; + char *kbuff, *file = NULL; - if (argc > 1) { - if (!strncmp("-o", argv[argc - 1], 2)) { + while ((opt = getopt(argc, argv, "of:")) != -1) { + switch (opt) { + case 'o': opcodes = 1; - } else { - printf("usage: bpf_jit_disasm [-o: show opcodes]\n"); - exit(0); + break; + case 'f': + file = optarg; + break; + default: + usage(); + return -1; } } bfd_init(); memset(image, 0, sizeof(image)); - kbuff = get_klog_buff(&klen); + kbuff = get_log_buff(file, &klen); + if (!kbuff) { + fprintf(stderr, "Could not retrieve log buffer!\n"); + return -1; + } len = get_last_jit_image(kbuff, klen, image, sizeof(image)); if (len > 0) get_asm_insns(image, len, opcodes); + else + fprintf(stderr, "No JIT image found!\n"); - put_klog_buff(kbuff); - + put_log_buff(kbuff); return 0; } diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 347a27322ed8..2e9ce77b5e14 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -276,7 +276,11 @@ filter out the startup phase of the program, which is often very different. --intr-regs:: Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option -is off by default. +is off by default. It is possible to select the registers to sample using their +symbolic names, e.g. on x86, ax, si. To list the available registers use +--intr-regs=\?. To name registers, pass a comma separated list such as +--intr-regs=ax,bx. The list of register is architecture dependent. + --running-time:: Record running and enabled time for read events (:S) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 614b2c7b0293..dc3ec783b7bd 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, flags. + srcline, period, iregs, flags. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c index 0d0897f57a10..f8dfa89696f4 100644 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ b/tools/perf/arch/sh/util/dwarf-regs.c @@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = { /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL; + return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; } diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c index 92eda412fed3..b704fdb9237a 100644 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ b/tools/perf/arch/sparc/util/dwarf-regs.c @@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = { */ const char *get_arch_regstr(unsigned int n) { - return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; + return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; } diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 2c55e1b336c5..ff63649fa9ac 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += tsc.o libperf-y += pmu.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index be22dd463232..a08de0a35b83 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -71,5 +71,5 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = { /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; + return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; } diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c new file mode 100644 index 000000000000..c5db14f36cc7 --- /dev/null +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -0,0 +1,28 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(AX, PERF_REG_X86_AX), + SMPL_REG(BX, PERF_REG_X86_BX), + SMPL_REG(CX, PERF_REG_X86_CX), + SMPL_REG(DX, PERF_REG_X86_DX), + SMPL_REG(SI, PERF_REG_X86_SI), + SMPL_REG(DI, PERF_REG_X86_DI), + SMPL_REG(BP, PERF_REG_X86_BP), + SMPL_REG(SP, PERF_REG_X86_SP), + SMPL_REG(IP, PERF_REG_X86_IP), + SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), + SMPL_REG(CS, PERF_REG_X86_CS), + SMPL_REG(SS, PERF_REG_X86_SS), +#ifdef HAVE_ARCH_X86_64_SUPPORT + SMPL_REG(R8, PERF_REG_X86_R8), + SMPL_REG(R9, PERF_REG_X86_R9), + SMPL_REG(R10, PERF_REG_X86_R10), + SMPL_REG(R11, PERF_REG_X86_R11), + SMPL_REG(R12, PERF_REG_X86_R12), + SMPL_REG(R13, PERF_REG_X86_R13), + SMPL_REG(R14, PERF_REG_X86_R14), + SMPL_REG(R15, PERF_REG_X86_R15), +#endif + SMPL_REG_END +}; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a660022f2c92..142eeb341b29 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -27,8 +27,10 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/data.h" +#include "util/perf_regs.h" #include "util/auxtrace.h" #include "util/parse-branch-options.h" +#include "util/parse-regs-options.h" #include <unistd.h> #include <sched.h> @@ -279,7 +281,7 @@ static int record__open(struct record *rec) evlist__for_each(evlist, pos) { try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { + if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose) ui__warning("%s\n", msg); @@ -1080,8 +1082,9 @@ struct option __record_options[] = { "sample transaction flags (special events only)"), OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, "use per-thread mmaps"), - OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, - "Sample machine registers on interrupt"), + OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", + "sample selected machine registers on interrupt," + " use -I ? to list register names", parse_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4430340292c0..284a76e04628 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -6,6 +6,7 @@ #include "util/exec_cmd.h" #include "util/header.h" #include "util/parse-options.h" +#include "util/perf_regs.h" #include "util/session.h" #include "util/tool.h" #include "util/symbol.h" @@ -46,6 +47,7 @@ enum perf_output_field { PERF_OUTPUT_SYMOFFSET = 1U << 11, PERF_OUTPUT_SRCLINE = 1U << 12, PERF_OUTPUT_PERIOD = 1U << 13, + PERF_OUTPUT_IREGS = 1U << 14, }; struct output_option { @@ -66,6 +68,7 @@ struct output_option { {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "period", .field = PERF_OUTPUT_PERIOD}, + {.str = "iregs", .field = PERF_OUTPUT_IREGS}, }; /* default set to maintain compatibility with current format */ @@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_PERIOD)) return -EINVAL; + if (PRINT_FIELD(IREGS) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", + PERF_OUTPUT_IREGS)) + return -EINVAL; + return 0; } @@ -352,6 +360,24 @@ out: return 0; } +static void print_sample_iregs(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr) +{ + struct regs_dump *regs = &sample->intr_regs; + uint64_t mask = attr->sample_regs_intr; + unsigned i = 0, r; + + if (!regs) + return; + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + } +} + static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) @@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample, PERF_MAX_STACK_DEPTH); } + if (PRINT_FIELD(IREGS)) + print_sample_iregs(event, sample, thread, attr); + printf("\n"); } @@ -739,8 +768,8 @@ static int process_exit_event(struct perf_tool *tool, if (!evsel->attr.sample_id_all) { sample->cpu = 0; sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; + sample->tid = event->fork.tid; + sample->pid = event->fork.pid; } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); @@ -1643,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,flags", parse_output_fields), + "addr,symoff,period,iregs,flags", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", diff --git a/tools/perf/perf.h b/tools/perf/perf.h index cccb4cf575d3..90129accffbe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -54,7 +54,6 @@ struct record_opts { bool sample_time_set; bool callgraph_set; bool period; - bool sample_intr_regs; bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; @@ -64,6 +63,7 @@ struct record_opts { unsigned int auxtrace_mmap_pages; unsigned int user_freq; u64 branch_stack; + u64 sample_intr_regs; u64 default_interval; u64 user_interval; size_t auxtrace_snapshot_size; diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index a337356fd979..52d55971f66f 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -26,7 +26,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; - obj = bpf_object__open_buffer(obj_buf, obj_buf_sz); + obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); if (!obj) return -1; bpf_object__close(obj); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aa21c90731b..5b83f56a3b6f 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -34,6 +34,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; + struct cpu_map *cpus; + struct thread_map *threads; attr.sample_freq = 500; @@ -50,14 +52,19 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } perf_evlist__add(evlist, evsel); - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(getpid()); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(getpid()); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + if (perf_evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -107,6 +114,9 @@ next_event: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 3a8fedef83bc..add16385f13e 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -43,6 +43,8 @@ int test__task_exit(void) }; const char *argv[] = { "true", NULL }; char sbuf[STRERR_BUFSIZE]; + struct cpu_map *cpus; + struct thread_map *threads; signal(SIGCHLD, sig_handler); @@ -58,14 +60,19 @@ int test__task_exit(void) * perf_evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(-1); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(-1); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + err = perf_evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { @@ -114,6 +121,9 @@ retry: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cf86f2d3a5e7..c04c60d4863c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1968,7 +1968,8 @@ skip_annotation: &options[nr_options], dso); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection->map); + browser->selection ? + browser->selection->map : NULL); /* perf script support */ if (browser->he_selection) { @@ -1976,6 +1977,15 @@ skip_annotation: &actions[nr_options], &options[nr_options], thread, NULL); + /* + * Note that browser->selection != NULL + * when browser->he_selection is not NULL, + * so we don't need to check browser->selection + * before fetching browser->selection->sym like what + * we do before fetching browser->selection->map. + * + * See hist_browser__show_entry. + */ nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e912856cc4e5..349bc96ca1fe 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -75,6 +75,7 @@ libperf-y += record.o libperf-y += srcline.o libperf-y += data.o libperf-$(CONFIG_X86) += tsc.o +libperf-$(CONFIG_AUXTRACE) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o @@ -82,6 +83,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += parse-regs-options.o libperf-$(CONFIG_LIBELF) += symbol-elf.o libperf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8d00039d6a20..c8fc8a258f42 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -124,6 +124,33 @@ void perf_evlist__delete(struct perf_evlist *evlist) free(evlist); } +static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (!evsel->own_cpus || evlist->has_user_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); + } + + thread_map__put(evsel->threads); + evsel->threads = thread_map__get(evlist->threads); +} + +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evlist__propagate_maps(evlist, evsel); +} + void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { entry->evlist = evlist; @@ -133,18 +160,19 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) if (!evlist->nr_entries++) perf_evlist__set_id_pos(evlist); + + __perf_evlist__propagate_maps(evlist, entry); } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries) + struct list_head *list) { - bool set_id_pos = !evlist->nr_entries; + struct perf_evsel *evsel, *temp; - list_splice_tail(list, &evlist->entries); - evlist->nr_entries += nr_entries; - if (set_id_pos) - perf_evlist__set_id_pos(evlist); + __evlist__for_each_safe(list, temp, evsel) { + list_del_init(&evsel->node); + perf_evlist__add(evlist, evsel); + } } void __perf_evlist__set_leader(struct list_head *list) @@ -210,7 +238,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, list_add_tail(&evsel->node, &head); } - perf_evlist__splice_list_tail(evlist, &head, nr_attrs); + perf_evlist__splice_list_tail(evlist, &head); return 0; @@ -1103,71 +1131,56 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - bool has_user_cpus) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ - if (evsel->cpus && has_user_cpus) - cpu_map__put(evsel->cpus); - - if (!evsel->cpus || has_user_cpus) - evsel->cpus = cpu_map__get(evlist->cpus); - - evsel->threads = thread_map__get(evlist->threads); - - if ((evlist->cpus && !evsel->cpus) || - (evlist->threads && !evsel->threads)) - return -ENOMEM; - } - - return 0; -} - int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { - evlist->threads = thread_map__new_str(target->pid, target->tid, - target->uid); + struct cpu_map *cpus; + struct thread_map *threads; - if (evlist->threads == NULL) + threads = thread_map__new_str(target->pid, target->tid, target->uid); + + if (!threads) return -1; if (target__uses_dummy_map(target)) - evlist->cpus = cpu_map__dummy_new(); + cpus = cpu_map__dummy_new(); else - evlist->cpus = cpu_map__new(target->cpu_list); + cpus = cpu_map__new(target->cpu_list); - if (evlist->cpus == NULL) + if (!cpus) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, !!target->cpu_list); + evlist->has_user_cpus = !!target->cpu_list; + + perf_evlist__set_maps(evlist, cpus, threads); + + return 0; out_delete_threads: - thread_map__put(evlist->threads); - evlist->threads = NULL; + thread_map__put(threads); return -1; } -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads) { - if (evlist->cpus) + /* + * Allow for the possibility that one or another of the maps isn't being + * changed i.e. don't put it. Note we are assuming the maps that are + * being applied are brand new and evlist is taking ownership of the + * original reference count of 1. If that is not the case it is up to + * the caller to increase the reference count. + */ + if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); + evlist->cpus = cpus; + } - evlist->cpus = cpus; - - if (evlist->threads) + if (threads != evlist->threads) { thread_map__put(evlist->threads); + evlist->threads = threads; + } - evlist->threads = threads; - - return perf_evlist__propagate_maps(evlist, false); + perf_evlist__propagate_maps(evlist); } int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) @@ -1181,6 +1194,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e if (evsel->filter == NULL) continue; + /* + * filters only work for tracepoint event, which doesn't have cpu limit. + * So evlist and evsel should always be same. + */ err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); if (err) { *err_evsel = evsel; @@ -1383,6 +1400,8 @@ void perf_evlist__close(struct perf_evlist *evlist) static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) { + struct cpu_map *cpus; + struct thread_map *threads; int err = -ENOMEM; /* @@ -1394,20 +1413,19 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - evlist->cpus = cpu_map__new(NULL); - if (evlist->cpus == NULL) + cpus = cpu_map__new(NULL); + if (!cpus) goto out; - evlist->threads = thread_map__new_dummy(); - if (evlist->threads == NULL) - goto out_free_cpus; + threads = thread_map__new_dummy(); + if (!threads) + goto out_put; - err = 0; + perf_evlist__set_maps(evlist, cpus, threads); out: return err; -out_free_cpus: - cpu_map__put(evlist->cpus); - evlist->cpus = NULL; +out_put: + cpu_map__put(cpus); goto out; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b39a6198f4ac..115d8b53c601 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -42,6 +42,7 @@ struct perf_evlist { int nr_mmaps; bool overwrite; bool enabled; + bool has_user_cpus; size_t mmap_len; int id_pos; int is_pos; @@ -155,9 +156,8 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads); +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); @@ -179,8 +179,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries); + struct list_head *list); static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bac25f41a751..5410483d5219 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -787,7 +787,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) perf_evsel__config_callgraph(evsel, opts, &callchain_param); if (opts->sample_intr_regs) { - attr->sample_regs_intr = PERF_REGS_MASK; + attr->sample_regs_intr = opts->sample_intr_regs; perf_evsel__set_sample_bit(evsel, REGS_INTR); } @@ -1033,6 +1033,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); + cpu_map__put(evsel->own_cpus); thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 298e6bbca200..ef8925f7211a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -98,6 +98,7 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct cpu_map *own_cpus; struct thread_map *threads; unsigned int sample_size; int id_pos; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 41814547da15..fce6634aebe2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1438,7 +1438,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1447,7 +1447,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ea768625ab5b..eb0e7f8bf515 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -623,7 +623,7 @@ static int intel_bts_process_event(struct perf_session *session, if (err) return err; if (event->header.type == PERF_RECORD_EXIT) { - err = intel_bts_process_tid_exit(bts, event->comm.tid); + err = intel_bts_process_tid_exit(bts, event->fork.tid); if (err) return err; } diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 240730d682c1..2386322ece4f 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -4,6 +4,7 @@ inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 9e4eb8fcd559..d23138c06665 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -146,6 +146,9 @@ static void intel_pt_insn_decoder(struct insn *insn, case 4: intel_pt_insn->rel = bswap_32(insn->immediate.value); break; + default: + intel_pt_insn->rel = 0; + break; } #else intel_pt_insn->rel = insn->immediate.value; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bb41c20e6005..535d86f8e4d1 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1494,7 +1494,7 @@ static int intel_pt_process_event(struct perf_session *session, if (pt->timeless_decoding) { if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, - event->comm.tid, + event->fork.tid, sample->time); } } else if (timestamp) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d826e6f515db..21ed6ee63da9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -287,8 +287,8 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - if (cpus) - evsel->cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); if (name) evsel->name = strdup(name); @@ -1140,10 +1140,9 @@ int parse_events(struct perf_evlist *evlist, const char *str, ret = parse_events__scanner(str, &data, PE_START_EVENTS); perf_pmu__parse_cleanup(); if (!ret) { - int entries = data.idx - evlist->nr_entries; struct perf_evsel *last; - perf_evlist__splice_list_tail(evlist, &data.list, entries); + perf_evlist__splice_list_tail(evlist, &data.list); evlist->nr_groups += data.nr_groups; last = perf_evlist__last(evlist); last->cmdline_group_boundary = true; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 591905a02b92..9cd70819c795 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -255,7 +255,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc list_add_tail(&term->list, head); ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); + ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); parse_events__free_terms(head); $$ = list; } diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c new file mode 100644 index 000000000000..4f2c1c255d81 --- /dev/null +++ b/tools/perf/util/parse-regs-options.c @@ -0,0 +1,71 @@ +#include "perf.h" +#include "util/util.h" +#include "util/debug.h" +#include "util/parse-options.h" +#include "util/parse-regs-options.h" + +int +parse_regs(const struct option *opt, const char *str, int unset) +{ + uint64_t *mode = (uint64_t *)opt->value; + const struct sample_reg *r; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -I */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + if (!strcmp(s, "?")) { + fprintf(stderr, "available registers: "); + for (r = sample_reg_masks; r->name; r++) { + fprintf(stderr, "%s ", r->name); + } + fputc('\n', stderr); + /* just printing available regs */ + return -1; + } + for (r = sample_reg_masks; r->name; r++) { + if (!strcasecmp(s, r->name)) + break; + } + if (!r->name) { + ui__warning("unknown register %s," + " check man page\n", s); + goto error; + } + + *mode |= r->mask; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to all possible regs */ + if (*mode == 0) + *mode = PERF_REGS_MASK; +error: + free(os); + return ret; +} diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h new file mode 100644 index 000000000000..7d762b188007 --- /dev/null +++ b/tools/perf/util/parse-regs-options.h @@ -0,0 +1,5 @@ +#ifndef _PERF_PARSE_REGS_OPTIONS_H +#define _PERF_PARSE_REGS_OPTIONS_H 1 +struct option; +int parse_regs(const struct option *opt, const char *str, int unset); +#endif /* _PERF_PARSE_REGS_OPTIONS_H */ diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 43168fb0d9a2..885e8ac83997 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -2,6 +2,10 @@ #include "perf_regs.h" #include "event.h" +const struct sample_reg __weak sample_reg_masks[] = { + SMPL_REG_END +}; + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 980dbf76bc98..2984dcc54d67 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -5,6 +5,15 @@ struct regs_dump; +struct sample_reg { + const char *name; + uint64_t mask; +}; +#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define SMPL_REG_END { .name = NULL } + +extern const struct sample_reg sample_reg_masks[]; + #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index 3d1537b93c64..e882c8320135 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -8,154 +8,20 @@ # as published by the Free Software Foundation; version 2 # of the License. -OUTPUT=./ -ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ -endif - -ifneq ($(OUTPUT),) -# check that the output directory actually exists -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) -$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) -endif - -SUBDIRS = tools/ec - -# --- CONFIGURATION BEGIN --- - -# Set the following to `true' to make a unstripped, unoptimized -# binary. Leave this set to `false' for production use. -DEBUG ?= true - -# make the build silent. Set this to something else to make it noisy again. -V ?= false - -# Prefix to the directories we're installing to -DESTDIR ?= - -# --- CONFIGURATION END --- - -# Directory definitions. These are default and most probably -# do not need to be changed. Please note that DESTDIR is -# added in front of any of them - -bindir ?= /usr/bin -sbindir ?= /usr/sbin -mandir ?= /usr/man - -# Toolchain: what tools do we use, and what options do they need: - -INSTALL = /usr/bin/install -c -INSTALL_PROGRAM = ${INSTALL} -INSTALL_DATA = ${INSTALL} -m 644 -INSTALL_SCRIPT = ${INSTALL_PROGRAM} - -# If you are running a cross compiler, you may want to set this -# to something more interesting, like "arm-linux-". If you want -# to compile vs uClibc, that can be done here as well. -CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- -CC = $(CROSS)gcc -LD = $(CROSS)gcc -STRIP = $(CROSS)strip -HOSTCC = gcc - -# check if compiler option is supported -cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} - -# use '-Os' optimization if available, else use -O2 -OPTIMIZATION := $(call cc-supports,-Os,-O2) - -WARNINGS := -Wall -WARNINGS += $(call cc-supports,-Wstrict-prototypes) -WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) - -KERNEL_INCLUDE := ../../../include -ACPICA_INCLUDE := ../../../drivers/acpi/acpica -CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) -CFLAGS += $(WARNINGS) - -ifeq ($(strip $(V)),false) - QUIET=@ - ECHO=@echo -else - QUIET= - ECHO=@\# -endif -export QUIET ECHO - -# if DEBUG is enabled, then we do not strip or optimize -ifeq ($(strip $(DEBUG)),true) - CFLAGS += -O1 -g -DDEBUG - STRIPCMD = /bin/true -Since_we_are_debugging -else - CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer - STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment -endif - -# --- ACPIDUMP BEGIN --- - -vpath %.c \ - ../../../drivers/acpi/acpica\ - tools/acpidump\ - common\ - os_specific/service_layers - -CFLAGS += -DACPI_DUMP_APP -Itools/acpidump - -DUMP_OBJS = \ - apdump.o\ - apfiles.o\ - apmain.o\ - osunixdir.o\ - osunixmap.o\ - osunixxf.o\ - tbprint.o\ - tbxfroot.o\ - utbuffer.o\ - utdebug.o\ - utexcep.o\ - utglobal.o\ - utmath.o\ - utprint.o\ - utstring.o\ - utxferror.o\ - oslibcfs.o\ - oslinuxtbl.o\ - cmfsize.o\ - getopt.o - -DUMP_OBJS := $(addprefix $(OUTPUT)tools/acpidump/,$(DUMP_OBJS)) - -$(OUTPUT)acpidump: $(DUMP_OBJS) - $(ECHO) " LD " $@ - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(DUMP_OBJS) -L$(OUTPUT) -o $@ - $(QUIET) $(STRIPCMD) $@ - -$(OUTPUT)tools/acpidump/%.o: %.c - $(ECHO) " CC " $@ - $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< - -# --- ACPIDUMP END --- - -all: $(OUTPUT)acpidump - echo $(OUTPUT) - -clean: - -find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ - | xargs rm -f - -rm -f $(OUTPUT)acpidump - -install-tools: - $(INSTALL) -d $(DESTDIR)${sbindir} - $(INSTALL_PROGRAM) $(OUTPUT)acpidump $(DESTDIR)${sbindir} - -install-man: - $(INSTALL_DATA) -D man/acpidump.8 $(DESTDIR)${mandir}/man8/acpidump.8 - -install: all install-tools install-man - -uninstall: - - rm -f $(DESTDIR)${sbindir}/acpidump - - rm -f $(DESTDIR)${mandir}/man8/acpidump.8 - -.PHONY: all utils install-tools install-man install uninstall clean +include ../../scripts/Makefile.include + +all: acpidump ec +clean: acpidump_clean ec_clean +install: acpidump_install ec_install +uninstall: acpidump_uninstall ec_uninstall + +acpidump ec: FORCE + $(call descend,tools/$@,all) +acpidump_clean ec_clean: + $(call descend,tools/$(@:_clean=),clean) +acpidump_install ec_install: + $(call descend,tools/$(@:_install=),install) +acpidump_uninstall ec_uninstall: + $(call descend,tools/$(@:_uninstall=),uninstall) + +.PHONY: FORCE diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config new file mode 100644 index 000000000000..552af68d5414 --- /dev/null +++ b/tools/power/acpi/Makefile.config @@ -0,0 +1,92 @@ +# tools/power/acpi/Makefile.config - ACPI tool Makefile +# +# Copyright (c) 2015, Intel Corporation +# Author: Lv Zheng <lv.zheng@intel.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 +# of the License. + +include ../../../../scripts/Makefile.include + +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + +ifneq ($(OUTPUT),) +# check that the output directory actually exists +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +endif + +# --- CONFIGURATION BEGIN --- + +# Set the following to `true' to make a unstripped, unoptimized +# binary. Leave this set to `false' for production use. +DEBUG ?= true + +# make the build silent. Set this to something else to make it noisy again. +V ?= false + +# Prefix to the directories we're installing to +DESTDIR ?= + +# --- CONFIGURATION END --- + +# Directory definitions. These are default and most probably +# do not need to be changed. Please note that DESTDIR is +# added in front of any of them + +bindir ?= /usr/bin +sbindir ?= /usr/sbin +mandir ?= /usr/man + +# Toolchain: what tools do we use, and what options do they need: + +INSTALL = /usr/bin/install -c +INSTALL_PROGRAM = ${INSTALL} +INSTALL_DATA = ${INSTALL} -m 644 +INSTALL_SCRIPT = ${INSTALL_PROGRAM} + +# If you are running a cross compiler, you may want to set this +# to something more interesting, like "arm-linux-". If you want +# to compile vs uClibc, that can be done here as well. +CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- +CC = $(CROSS)gcc +LD = $(CROSS)gcc +STRIP = $(CROSS)strip +HOSTCC = gcc + +# check if compiler option is supported +cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} + +# use '-Os' optimization if available, else use -O2 +OPTIMIZATION := $(call cc-supports,-Os,-O2) + +WARNINGS := -Wall +WARNINGS += $(call cc-supports,-Wstrict-prototypes) +WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) + +KERNEL_INCLUDE := ../../../include +ACPICA_INCLUDE := ../../../drivers/acpi/acpica +CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) +CFLAGS += $(WARNINGS) + +ifeq ($(strip $(V)),false) + QUIET=@ + ECHO=@echo +else + QUIET= + ECHO=@\# +endif + +# if DEBUG is enabled, then we do not strip or optimize +ifeq ($(strip $(DEBUG)),true) + CFLAGS += -O1 -g -DDEBUG + STRIPCMD = /bin/true -Since_we_are_debugging +else + CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer + STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment +endif diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules new file mode 100644 index 000000000000..ec87a9e562c0 --- /dev/null +++ b/tools/power/acpi/Makefile.rules @@ -0,0 +1,37 @@ +# tools/power/acpi/Makefile.rules - ACPI tool Makefile +# +# Copyright (c) 2015, Intel Corporation +# Author: Lv Zheng <lv.zheng@intel.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 +# of the License. + +$(OUTPUT)$(TOOL): $(TOOL_OBJS) FORCE + $(ECHO) " LD " $@ + $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(TOOL_OBJS) -L$(OUTPUT) -o $@ + $(QUIET) $(STRIPCMD) $@ + +$(OUTPUT)%.o: %.c + $(ECHO) " CC " $@ + $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< + +all: $(OUTPUT)$(TOOL) +clean: + -find $(OUTPUT) \( -not -type d \) \ + -and \( -name '*~' -o -name '*.[oas]' \) \ + -type f -print \ + | xargs rm -f + -rm -f $(OUTPUT)$(TOOL) + +install-tools: + $(INSTALL) -d $(DESTDIR)${sbindir} + $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)${sbindir} +uninstall-tools: + - rm -f $(DESTDIR)${sbindir}/$(TOOL) + +install: all install-tools $(EXTRA_INSTALL) +uninstall: uninstall-tools $(EXTRA_UNINSTALL) + +.PHONY: FORCE diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile new file mode 100644 index 000000000000..8d761576e91b --- /dev/null +++ b/tools/power/acpi/tools/acpidump/Makefile @@ -0,0 +1,53 @@ +# tools/power/acpi/tools/acpidump/Makefile - ACPI tool Makefile +# +# Copyright (c) 2015, Intel Corporation +# Author: Lv Zheng <lv.zheng@intel.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 +# of the License. + +include ../../Makefile.config + +TOOL = acpidump +EXTRA_INSTALL = install-man +EXTRA_UNINSTALL = uninstall-man + +vpath %.c \ + ../../../../../drivers/acpi/acpica\ + ./\ + ../../common\ + ../../os_specific/service_layers +CFLAGS += -DACPI_DUMP_APP -I.\ + -I../../../../../drivers/acpi/acpica\ + -I../../../../../include +TOOL_OBJS = \ + apdump.o\ + apfiles.o\ + apmain.o\ + osunixdir.o\ + osunixmap.o\ + osunixxf.o\ + tbprint.o\ + tbxfroot.o\ + utbuffer.o\ + utdebug.o\ + utexcep.o\ + utglobal.o\ + utmath.o\ + utnonansi.o\ + utprint.o\ + utstring.o\ + utxferror.o\ + oslibcfs.o\ + oslinuxtbl.o\ + cmfsize.o\ + getopt.o + +include ../../Makefile.rules + +install-man: ../../man/acpidump.8 + $(INSTALL_DATA) -D $< $(DESTDIR)${mandir}/man8/acpidump.8 +uninstall-man: + - rm -f $(DESTDIR)${mandir}/man8/acpidump.8 diff --git a/tools/power/acpi/tools/ec/Makefile b/tools/power/acpi/tools/ec/Makefile index b7b0b929bd32..75d8a127b6ee 100644 --- a/tools/power/acpi/tools/ec/Makefile +++ b/tools/power/acpi/tools/ec/Makefile @@ -1,22 +1,17 @@ -ec_access: ec_access.o - $(ECHO) " LD " $@ - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $< -o $@ - $(QUIET) $(STRIPCMD) $@ +# tools/power/acpi/tools/acpidump/Makefile - ACPI tool Makefile +# +# Copyright (c) 2015, Intel Corporation +# Author: Lv Zheng <lv.zheng@intel.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 +# of the License. -%.o: %.c - $(ECHO) " CC " $@ - $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< +include ../../Makefile.config -all: ec_access +TOOL = ec +TOOL_OBJS = \ + ec_access.o -install: - $(INSTALL) -d $(DESTDIR)${sbindir} - $(INSTALL_PROGRAM) ec_access $(DESTDIR)${sbindir} - -uninstall: - - rm -f $(DESTDIR)${sbindir}/ec_access - -clean: - -rm -f $(OUTPUT)ec_access - -.PHONY: all install uninstall +include ../../Makefile.rules diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index f656e585ed45..4e213576381e 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -17,6 +17,7 @@ #include "cpufreq.h" #include "helpers/helpers.h" +#include "helpers/sysfs.h" #define NORM_FREQ_LEN 32 @@ -318,6 +319,9 @@ int cmd_freq_set(int argc, char **argv) cpufreq_cpu_exists(cpu)) continue; + if (sysfs_is_cpu_online(cpu) != 1) + continue; + printf(_("Setting cpu: %d\n"), cpu); ret = do_one_cpu(cpu, &new_pol, freq, policychange); if (ret) { diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index c13120af519b..cea398c176e7 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -73,6 +73,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) for (cpu = 0; cpu < cpus; cpu++) { cpu_top->core_info[cpu].cpu = cpu; cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); + if (!cpu_top->core_info[cpu].is_online) + continue; if(sysfs_topology_read_file( cpu, "physical_package_id", diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 05b8fc38dc8b..622db685b4f9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -251,11 +251,6 @@ Although it is not guaranteed by the architecture, turbostat assumes that they count at TSC rate, which is true on all processors tested to date. .SH REFERENCES -"Intel® Turbo Boost Technology -in Intel® Coreâ„¢ Microarchitecture (Nehalem) Based Processors" -http://download.intel.com/design/processor/applnots/320354.pdf - -"Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide" http://www.intel.com/products/processor/manuals/ diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 323b65edfc97..9655cb49c7cb 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -372,7 +372,7 @@ void print_header(void) if (do_rapl & RAPL_GFX) outp += sprintf(outp, " GFX_J"); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, " RAM_W"); + outp += sprintf(outp, " RAM_J"); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, " PKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) @@ -1157,7 +1157,7 @@ dump_nhm_platform_info(void) get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); - fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); + fprintf(stderr, "cpu%d: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", @@ -1168,8 +1168,8 @@ dump_nhm_platform_info(void) ratio, bclk, ratio * bclk); get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); - fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", - msr, msr & 0x2 ? "EN" : "DIS"); + fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", + base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); return; } @@ -1182,7 +1182,7 @@ dump_hsw_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); - fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); + fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 8) & 0xFF; if (ratio) @@ -1204,7 +1204,7 @@ dump_ivt_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); - fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); + fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1256,7 +1256,7 @@ dump_nhm_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1312,8 +1312,8 @@ dump_knl_turbo_ratio_limits(void) get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", - msr); + fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + base_cpu, msr); /** * Turbo encoding in KNL is as follows: @@ -1371,7 +1371,7 @@ dump_nhm_cst_cfg(void) #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", @@ -1384,6 +1384,49 @@ dump_nhm_cst_cfg(void) return; } +static void +dump_config_tdp(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); + fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); + fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); + + get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); + fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); + if (msr) { + fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); + fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); + fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); + fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); + } + fprintf(stderr, ")\n"); + + get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); + fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); + if (msr) { + fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); + fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); + fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); + fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); + } + fprintf(stderr, ")\n"); + + get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); + fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); + if ((msr) & 0x3) + fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); + fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(stderr, ")\n"); + + get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); + fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); + fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xEF); + fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(stderr, ")\n"); +} + void free_all_buffers(void) { CPU_FREE(cpu_present_set); @@ -1873,6 +1916,36 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } +int has_config_tdp(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ + + case 0x3C: /* HSW */ + case 0x3F: /* HSX */ + case 0x45: /* HSW */ + case 0x46: /* HSW */ + case 0x3D: /* BDW */ + case 0x47: /* BDW */ + case 0x4F: /* BDX */ + case 0x56: /* BDX-DE */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + + case 0x57: /* Knights Landing */ + return 1; + default: + return 0; + } +} + static void dump_cstate_pstate_config_info(family, model) { @@ -1893,6 +1966,9 @@ dump_cstate_pstate_config_info(family, model) if (has_knl_turbo_ratio_limit(family, model)) dump_knl_turbo_ratio_limits(); + if (has_config_tdp(family, model)) + dump_config_tdp(); + dump_nhm_cst_cfg(); } @@ -3014,7 +3090,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.7 27-May, 2015" + fprintf(stderr, "turbostat version 4.7 17-June, 2015" " - Len Brown <lenb@kernel.org>\n"); } @@ -3042,7 +3118,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "C:c:Ddhi:JM:m:PpST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", long_options, &option_index)) != -1) { switch (opt) { case 'C': diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index f56914c7929b..38b00ecb2ed5 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,9 +1,12 @@ -ldflags-y += --wrap=ioremap_wt ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=memremap ldflags-y += --wrap=devm_ioremap_nocache -ldflags-y += --wrap=ioremap_cache +ldflags-y += --wrap=devm_memremap +ldflags-y += --wrap=devm_memunmap ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap +ldflags-y += --wrap=memunmap +ldflags-y += --wrap=__devm_request_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region @@ -15,6 +18,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o nfit-y := $(ACPI_SRC)/nfit.o @@ -29,6 +33,9 @@ nd_btt-y += config_check.o nd_blk-y := $(NVDIMM_SRC)/blk.o nd_blk-y += config_check.o +nd_e820-y := $(NVDIMM_SRC)/e820.o +nd_e820-y += config_check.o + libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y += $(NVDIMM_SRC)/bus.o libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o @@ -37,7 +44,9 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o libnvdimm-y += $(NVDIMM_SRC)/region.o libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o libnvdimm-y += $(NVDIMM_SRC)/label.o +libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o +libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-y += config_check.o obj-m += test/ diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 64bfaa50831c..b7251314bbc0 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -80,23 +80,52 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, } EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); -void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) +void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) { - return __nfit_test_ioremap(offset, size, ioremap_cache); + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return nfit_res->buf + offset - nfit_res->res->start; + return devm_memremap(dev, offset, size, flags); } -EXPORT_SYMBOL(__wrap_ioremap_cache); +EXPORT_SYMBOL(__wrap_devm_memremap); -void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) +void *__wrap_memremap(resource_size_t offset, size_t size, + unsigned long flags) { - return __nfit_test_ioremap(offset, size, ioremap_nocache); + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return nfit_res->buf + offset - nfit_res->res->start; + return memremap(offset, size, flags); } -EXPORT_SYMBOL(__wrap_ioremap_nocache); +EXPORT_SYMBOL(__wrap_memremap); + +void __wrap_devm_memunmap(struct device *dev, void *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return devm_memunmap(dev, addr); +} +EXPORT_SYMBOL(__wrap_devm_memunmap); -void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) { - return __nfit_test_ioremap(offset, size, ioremap_wt); + return __nfit_test_ioremap(offset, size, ioremap_nocache); } -EXPORT_SYMBOL(__wrap_ioremap_wt); +EXPORT_SYMBOL(__wrap_ioremap_nocache); void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) { @@ -117,9 +146,22 @@ void __wrap_iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(__wrap_iounmap); -struct resource *__wrap___request_region(struct resource *parent, - resource_size_t start, resource_size_t n, const char *name, - int flags) +void __wrap_memunmap(void *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return memunmap(addr); +} +EXPORT_SYMBOL(__wrap_memunmap); + +static struct resource *nfit_test_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name, int flags) { struct nfit_test_resource *nfit_res; @@ -147,10 +189,29 @@ struct resource *__wrap___request_region(struct resource *parent, return res; } } + if (dev) + return __devm_request_region(dev, parent, start, n, name); return __request_region(parent, start, n, name, flags); } + +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags) +{ + return nfit_test_request_region(NULL, parent, start, n, name, flags); +} EXPORT_SYMBOL(__wrap___request_region); +struct resource *__wrap___devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name) +{ + if (!dev) + return NULL; + return nfit_test_request_region(dev, parent, start, n, name, 0); +} +EXPORT_SYMBOL(__wrap___devm_request_region); + void __wrap___release_region(struct resource *parent, resource_size_t start, resource_size_t n) { diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index d0bdae40ccc9..021e6f97f33e 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -147,75 +147,153 @@ static struct nfit_test *to_nfit_test(struct device *dev) return container_of(pdev, struct nfit_test, pdev); } +static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + nd_cmd->config_size = LABEL_SIZE; + nd_cmd->max_xfer = SZ_4K; + + return 0; +} + +static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr + *nd_cmd, unsigned int buf_len, void *label) +{ + unsigned int len, offset = nd_cmd->in_offset; + int rc; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len) + return -EINVAL; + + nd_cmd->status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(nd_cmd->out_buf, label + offset, len); + rc = buf_len - sizeof(*nd_cmd) - len; + + return rc; +} + +static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, + unsigned int buf_len, void *label) +{ + unsigned int len, offset = nd_cmd->in_offset; + u32 *status; + int rc; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len) + return -EINVAL; + + status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd); + *status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(label + offset, nd_cmd->in_buf, len); + rc = buf_len - sizeof(*nd_cmd) - (len + 4); + + return rc; +} + +static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->max_ars_out = 256; + nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; + + return 0; +} + +static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + + return 0; +} + +static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->out_length = 256; + nd_cmd->num_records = 0; + nd_cmd->status = 0; + + return 0; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); - struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); - int i, rc; + int i, rc = 0; - if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) - return -ENOTTY; + if (nvdimm) { + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); - /* lookup label space for the given dimm */ - for (i = 0; i < ARRAY_SIZE(handle); i++) - if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + return -ENOTTY; + + /* lookup label space for the given dimm */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == + handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + rc = nfit_test_cmd_get_config_size(buf, buf_len); break; - if (i >= ARRAY_SIZE(handle)) - return -ENXIO; + case ND_CMD_GET_CONFIG_DATA: + rc = nfit_test_cmd_get_config_data(buf, buf_len, + t->label[i]); + break; + case ND_CMD_SET_CONFIG_DATA: + rc = nfit_test_cmd_set_config_data(buf, buf_len, + t->label[i]); + break; + default: + return -ENOTTY; + } + } else { + if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) + return -ENOTTY; - switch (cmd) { - case ND_CMD_GET_CONFIG_SIZE: { - struct nd_cmd_get_config_size *nd_cmd = buf; - - if (buf_len < sizeof(*nd_cmd)) - return -EINVAL; - nd_cmd->status = 0; - nd_cmd->config_size = LABEL_SIZE; - nd_cmd->max_xfer = SZ_4K; - rc = 0; - break; - } - case ND_CMD_GET_CONFIG_DATA: { - struct nd_cmd_get_config_data_hdr *nd_cmd = buf; - unsigned int len, offset = nd_cmd->in_offset; - - if (buf_len < sizeof(*nd_cmd)) - return -EINVAL; - if (offset >= LABEL_SIZE) - return -EINVAL; - if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len) - return -EINVAL; - - nd_cmd->status = 0; - len = min(nd_cmd->in_length, LABEL_SIZE - offset); - memcpy(nd_cmd->out_buf, t->label[i] + offset, len); - rc = buf_len - sizeof(*nd_cmd) - len; - break; - } - case ND_CMD_SET_CONFIG_DATA: { - struct nd_cmd_set_config_hdr *nd_cmd = buf; - unsigned int len, offset = nd_cmd->in_offset; - u32 *status; - - if (buf_len < sizeof(*nd_cmd)) - return -EINVAL; - if (offset >= LABEL_SIZE) - return -EINVAL; - if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len) - return -EINVAL; - - status = buf + nd_cmd->in_length + sizeof(*nd_cmd); - *status = 0; - len = min(nd_cmd->in_length, LABEL_SIZE - offset); - memcpy(t->label[i] + offset, nd_cmd->in_buf, len); - rc = buf_len - sizeof(*nd_cmd) - (len + 4); - break; - } - default: - return -ENOTTY; + switch (cmd) { + case ND_CMD_ARS_CAP: + rc = nfit_test_cmd_ars_cap(buf, buf_len); + break; + case ND_CMD_ARS_START: + rc = nfit_test_cmd_ars_start(buf, buf_len); + break; + case ND_CMD_ARS_STATUS: + rc = nfit_test_cmd_ars_status(buf, buf_len); + break; + default: + return -ENOTTY; + } } return rc; @@ -876,6 +954,9 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); nd_desc = &acpi_desc->nd_desc; nd_desc->ndctl = nfit_test_ctl; } @@ -948,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, lane = nd_region_acquire_lane(nd_region); if (rw) - memcpy(mmio->base + dpa, iobuf, len); - else - memcpy(iobuf, mmio->base + dpa, len); + memcpy(mmio->addr.base + dpa, iobuf, len); + else { + memcpy(iobuf, mmio->addr.base + dpa, len); + + /* give us some some coverage of the mmio_flush_range() API */ + mmio_flush_range(mmio->addr.base + dpa, len); + } nd_region_release_lane(nd_region, lane); return 0; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 24ae9e829e9a..89b05e2222c9 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -6,6 +6,7 @@ TARGETS += firmware TARGETS += ftrace TARGETS += futex TARGETS += kcmp +TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug TARGETS += mount @@ -20,8 +21,10 @@ ifneq (1, $(quicktest)) TARGETS += timers endif TARGETS += user +TARGETS += jumplabel TARGETS += vm TARGETS += x86 +TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or # "make quicktest=1 kselftest from top level Makefile @@ -71,7 +74,6 @@ ifdef INSTALL_PATH @# Ask all targets to install their files mkdir -p $(INSTALL_PATH) for TARGET in $(TARGETS); do \ - mkdir -p $(INSTALL_PATH)/$$TARGET ; \ make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ done; diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index 182235640209..d27108b4f208 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -1,22 +1,12 @@ # Taken from perf makefile uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) -ifeq ($(ARCH),i386) - ARCH := x86 -endif -ifeq ($(ARCH),x86_64) - ARCH := x86 -endif +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) - -all: ifeq ($(ARCH),x86) - gcc breakpoint_test.c -o breakpoint_test -else - echo "Not an x86 target, can't build breakpoints selftests" +TEST_PROGS := breakpoint_test endif -TEST_PROGS := breakpoint_test +all: include ../lib.mk diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore new file mode 100644 index 000000000000..b732dd0d4738 --- /dev/null +++ b/tools/testing/selftests/capabilities/.gitignore @@ -0,0 +1,2 @@ +test_execve +validate_cap diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile new file mode 100644 index 000000000000..8c8f0c1f0889 --- /dev/null +++ b/tools/testing/selftests/capabilities/Makefile @@ -0,0 +1,18 @@ +all: + +include ../lib.mk + +.PHONY: all clean + +TARGETS := validate_cap test_execve +TEST_PROGS := test_execve + +CFLAGS := -O2 -g -std=gnu99 -Wall -lcap-ng + +all: $(TARGETS) + +clean: + $(RM) $(TARGETS) + +$(TARGETS): %: %.c + $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c new file mode 100644 index 000000000000..10a21a958aaf --- /dev/null +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -0,0 +1,427 @@ +#define _GNU_SOURCE + +#include <cap-ng.h> +#include <err.h> +#include <linux/capability.h> +#include <stdbool.h> +#include <string.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdarg.h> +#include <sched.h> +#include <sys/mount.h> +#include <limits.h> +#include <libgen.h> +#include <malloc.h> +#include <sys/wait.h> +#include <sys/prctl.h> +#include <sys/stat.h> + +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif + +static int nerrs; + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) { + err(1, "vsnprintf failed"); + } + if (buf_len >= sizeof(buf)) { + errx(1, "vsnprintf output truncated"); + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + err(1, "open of %s failed", filename); + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + errx(1, "short write to %s", filename); + } else { + err(1, "write to %s failed", filename); + } + } + if (close(fd) != 0) { + err(1, "close of %s failed", filename); + } +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); +} + +static bool create_and_enter_ns(uid_t inner_uid) +{ + uid_t outer_uid; + gid_t outer_gid; + int i; + bool have_outer_privilege; + + outer_uid = getuid(); + outer_gid = getgid(); + + /* + * TODO: If we're already root, we could skip creating the userns. + */ + + if (unshare(CLONE_NEWNS) == 0) { + printf("[NOTE]\tUsing global UIDs for tests\n"); + if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) + err(1, "PR_SET_KEEPCAPS"); + if (setresuid(inner_uid, inner_uid, -1) != 0) + err(1, "setresuid"); + + // Re-enable effective caps + capng_get_caps_process(); + for (i = 0; i < CAP_LAST_CAP; i++) + if (capng_have_capability(CAPNG_PERMITTED, i)) + capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + have_outer_privilege = true; + } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) { + printf("[NOTE]\tUsing a user namespace for tests\n"); + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid); + write_file("/proc/self/gid_map", "0 %d 1", outer_gid); + + have_outer_privilege = false; + } else { + errx(1, "must be root or be able to create a userns"); + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0) + err(1, "remount everything private"); + + return have_outer_privilege; +} + +static void chdir_to_tmpfs(void) +{ + char cwd[PATH_MAX]; + if (getcwd(cwd, sizeof(cwd)) != cwd) + err(1, "getcwd"); + + if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0) + err(1, "mount private tmpfs"); + + if (chdir(cwd) != 0) + err(1, "chdir to private tmpfs"); + + if (umount2(".", MNT_DETACH) != 0) + err(1, "detach private tmpfs"); +} + +static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) +{ + int from = openat(fromfd, fromname, O_RDONLY); + if (from == -1) + err(1, "open copy source"); + + int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700); + + while (true) { + char buf[4096]; + ssize_t sz = read(from, buf, sizeof(buf)); + if (sz == 0) + break; + if (sz < 0) + err(1, "read"); + + if (write(to, buf, sz) != sz) + err(1, "write"); /* no short writes on tmpfs */ + } + + close(from); + close(to); +} + +static bool fork_wait(void) +{ + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + return true; + } else if (child > 0) { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + + return false; + } else { + err(1, "fork"); + } +} + +static void exec_other_validate_cap(const char *name, + bool eff, bool perm, bool inh, bool ambient) +{ + execl(name, name, (eff ? "1" : "0"), + (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"), + NULL); + err(1, "execl"); +} + +static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient) +{ + exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient); +} + +static int do_tests(int uid, const char *our_path) +{ + bool have_outer_privilege = create_and_enter_ns(uid); + + int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY); + if (ourpath_fd == -1) + err(1, "open '%s'", our_path); + + chdir_to_tmpfs(); + + copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap"); + + if (have_outer_privilege) { + uid_t gid = getegid(); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_suidroot"); + if (chown("validate_cap_suidroot", 0, -1) != 0) + err(1, "chown"); + if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_suidnonroot"); + if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0) + err(1, "chown"); + if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_sgidroot"); + if (chown("validate_cap_sgidroot", -1, 0) != 0) + err(1, "chown"); + if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_sgidnonroot"); + if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0) + err(1, "chown"); + if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) + err(1, "chmod"); +} + + capng_get_caps_process(); + + /* Make sure that i starts out clear */ + capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + if (uid == 0) { + printf("[RUN]\tRoot => ep\n"); + if (fork_wait()) + exec_validate_cap(true, true, false, false); + } else { + printf("[RUN]\tNon-root => no caps\n"); + if (fork_wait()) + exec_validate_cap(false, false, false, false); + } + + printf("[OK]\tCheck cap_ambient manipulation rules\n"); + + /* We should not be able to add ambient caps yet. */ + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) { + if (errno == EINVAL) + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n"); + else + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW); + capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW); + capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) { + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) { + printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n"); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_CLEAR_ALL"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_RAISE"); + + capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tDropping I should have dropped A\n"); + return 1; + } + + printf("[OK]\tBasic manipulation appears to work\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (uid == 0) { + printf("[RUN]\tRoot +i => eip\n"); + if (fork_wait()) + exec_validate_cap(true, true, true, false); + } else { + printf("[RUN]\tNon-root +i => i\n"); + if (fork_wait()) + exec_validate_cap(false, false, true, false); + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_RAISE"); + + printf("[RUN]\tUID %d +ia => eipa\n", uid); + if (fork_wait()) + exec_validate_cap(true, true, true, true); + + /* The remaining tests need real privilege */ + + if (!have_outer_privilege) { + printf("[SKIP]\tSUID/SGID tests (needs privilege)\n"); + goto done; + } + + if (uid == 0) { + printf("[RUN]\tRoot +ia, suidroot => eipa\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_suidroot", + true, true, true, true); + + printf("[RUN]\tRoot +ia, suidnonroot => ip\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_suidnonroot", + false, true, true, false); + + printf("[RUN]\tRoot +ia, sgidroot => eipa\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidroot", + true, true, true, true); + + if (fork_wait()) { + printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); + if (setresgid(1, 1, 1) != 0) + err(1, "setresgid"); + exec_other_validate_cap("./validate_cap_sgidroot", + true, true, true, false); + } + + printf("[RUN]\tRoot +ia, sgidnonroot => eip\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidnonroot", + true, true, true, false); + } else { + printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); + exec_other_validate_cap("./validate_cap_sgidnonroot", + false, false, true, false); + + if (fork_wait()) { + printf("[RUN]\tNon-root +ia, sgidroot => i\n"); + if (setresgid(1, 1, 1) != 0) + err(1, "setresgid"); + exec_other_validate_cap("./validate_cap_sgidroot", + false, false, true, false); + } + } + +done: + return nerrs ? 1 : 0; +} + +int main(int argc, char **argv) +{ + char *tmp1, *tmp2, *our_path; + + /* Find our path */ + tmp1 = strdup(argv[0]); + if (!tmp1) + err(1, "strdup"); + tmp2 = dirname(tmp1); + our_path = strdup(tmp2); + if (!our_path) + err(1, "strdup"); + free(tmp1); + + if (fork_wait()) { + printf("[RUN]\t+++ Tests with uid == 0 +++\n"); + return do_tests(0, our_path); + } + + if (fork_wait()) { + printf("[RUN]\t+++ Tests with uid != 0 +++\n"); + return do_tests(1, our_path); + } + + return nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c new file mode 100644 index 000000000000..dd3c45f7b23c --- /dev/null +++ b/tools/testing/selftests/capabilities/validate_cap.c @@ -0,0 +1,73 @@ +#include <cap-ng.h> +#include <err.h> +#include <linux/capability.h> +#include <stdbool.h> +#include <string.h> +#include <stdio.h> +#include <sys/prctl.h> +#include <sys/auxv.h> + +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif + +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19) +# define HAVE_GETAUXVAL +#endif + +static bool bool_arg(char **argv, int i) +{ + if (!strcmp(argv[i], "0")) + return false; + else if (!strcmp(argv[i], "1")) + return true; + else + errx(1, "wrong argv[%d]", i); +} + +int main(int argc, char **argv) +{ + const char *atsec = ""; + + /* + * Be careful just in case a setgid or setcapped copy of this + * helper gets out. + */ + + if (argc != 5) + errx(1, "wrong argc"); + +#ifdef HAVE_GETAUXVAL + if (getauxval(AT_SECURE)) + atsec = " (AT_SECURE is set)"; + else + atsec = " (AT_SECURE is not set)"; +#endif + + capng_get_caps_process(); + + if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { + printf("[FAIL]\tWrong effective state%s\n", atsec); + return 1; + } + if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) { + printf("[FAIL]\tWrong permitted state%s\n", atsec); + return 1; + } + if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) { + printf("[FAIL]\tWrong inheritable state%s\n", atsec); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) { + printf("[FAIL]\tWrong ambient state%s\n", atsec); + return 1; + } + + printf("[OK]\tCapabilities after execve were correct\n"); + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index ee412bab7ed4..97f1c6742066 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -12,11 +12,14 @@ run_tests: all $(RUN_TESTS) define INSTALL_RULE - mkdir -p $(INSTALL_PATH) - @for TEST_DIR in $(TEST_DIRS); do\ - cp -r $$TEST_DIR $(INSTALL_PATH); \ - done; - install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ + mkdir -p $(INSTALL_PATH); \ + for TEST_DIR in $(TEST_DIRS); do \ + cp -r $$TEST_DIR $(INSTALL_PATH); \ + done; \ + echo "install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)"; \ + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES); \ + fi endef install: all diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore new file mode 100644 index 000000000000..020c44f49a9e --- /dev/null +++ b/tools/testing/selftests/membarrier/.gitignore @@ -0,0 +1 @@ +membarrier_test diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile new file mode 100644 index 000000000000..877a50355d7f --- /dev/null +++ b/tools/testing/selftests/membarrier/Makefile @@ -0,0 +1,11 @@ +CFLAGS += -g -I../../../../usr/include/ + +all: + $(CC) $(CFLAGS) membarrier_test.c -o membarrier_test + +TEST_PROGS := membarrier_test + +include ../lib.mk + +clean: + $(RM) membarrier_test diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c new file mode 100644 index 000000000000..dde312508007 --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -0,0 +1,121 @@ +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/membarrier.h> +#include <asm-generic/unistd.h> +#include <sys/syscall.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include "../kselftest.h" + +enum test_membarrier_status { + TEST_MEMBARRIER_PASS = 0, + TEST_MEMBARRIER_FAIL, + TEST_MEMBARRIER_SKIP, +}; + +static int sys_membarrier(int cmd, int flags) +{ + return syscall(__NR_membarrier, cmd, flags); +} + +static enum test_membarrier_status test_membarrier_cmd_fail(void) +{ + int cmd = -1, flags = 0; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong command should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_flags_fail(void) +{ + int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong flags should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_success(void) +{ + int cmd = MEMBARRIER_CMD_SHARED, flags = 0; + + if (sys_membarrier(cmd, flags) != 0) { + printf("membarrier: Executing MEMBARRIER_CMD_SHARED failed. %s.\n", + strerror(errno)); + return TEST_MEMBARRIER_FAIL; + } + + printf("membarrier: MEMBARRIER_CMD_SHARED success.\n"); + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier(void) +{ + enum test_membarrier_status status; + + status = test_membarrier_cmd_fail(); + if (status) + return status; + status = test_membarrier_flags_fail(); + if (status) + return status; + status = test_membarrier_success(); + if (status) + return status; + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_query(void) +{ + int flags = 0, ret; + + printf("membarrier MEMBARRIER_CMD_QUERY "); + ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags); + if (ret < 0) { + printf("failed. %s.\n", strerror(errno)); + switch (errno) { + case ENOSYS: + /* + * It is valid to build a kernel with + * CONFIG_MEMBARRIER=n. However, this skips the tests. + */ + return TEST_MEMBARRIER_SKIP; + case EINVAL: + default: + return TEST_MEMBARRIER_FAIL; + } + } + if (!(ret & MEMBARRIER_CMD_SHARED)) { + printf("command MEMBARRIER_CMD_SHARED is not supported.\n"); + return TEST_MEMBARRIER_FAIL; + } + printf("syscall available.\n"); + return TEST_MEMBARRIER_PASS; +} + +int main(int argc, char **argv) +{ + switch (test_membarrier_query()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + switch (test_membarrier()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + + printf("membarrier: tests done!\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 08c2a36ef7a9..412459369686 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -19,6 +19,8 @@ * - PACKET_FANOUT_LB * - PACKET_FANOUT_CPU * - PACKET_FANOUT_ROLLOVER + * - PACKET_FANOUT_CBPF + * - PACKET_FANOUT_EBPF * * Todo: * - functionality: PACKET_FANOUT_FLAG_DEFRAG @@ -44,7 +46,9 @@ #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> +#include <linux/unistd.h> /* for __NR_bpf */ #include <linux/filter.h> +#include <linux/bpf.h> #include <linux/if_packet.h> #include <net/ethernet.h> #include <netinet/ip.h> @@ -91,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets) return fd; } +static void sock_fanout_set_ebpf(int fd) +{ + const int len_off = __builtin_offsetof(struct __sk_buff, len); + struct bpf_insn prog[] = { + { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, + { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 }, + { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN }, + { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 }, + { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 }, + { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR }, + { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 }, + { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + char log_buf[512]; + union bpf_attr attr; + int pfd; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = (unsigned long) prog; + attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.license = (unsigned long) "GPL"; + attr.log_buf = (unsigned long) log_buf, + attr.log_size = sizeof(log_buf), + attr.log_level = 1, + + pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (pfd < 0) { + perror("bpf"); + fprintf(stderr, "bpf verifier:\n%s\n", log_buf); + exit(1); + } + + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { + perror("fanout data ebpf"); + exit(1); + } + + if (close(pfd)) { + perror("close ebpf"); + exit(1); + } +} + static char *sock_fanout_open_ring(int fd) { struct tpacket_req req = { @@ -115,8 +164,8 @@ static char *sock_fanout_open_ring(int fd) ring = mmap(0, req.tp_block_size * req.tp_block_nr, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (!ring) { - fprintf(stderr, "packetsock ring mmap\n"); + if (ring == MAP_FAILED) { + perror("packetsock ring mmap"); exit(1); } @@ -209,6 +258,7 @@ static int test_datapath(uint16_t typeflags, int port_off, { const int expect0[] = { 0, 0 }; char *rings[2]; + uint8_t type = typeflags & 0xFF; int fds[2], fds_udp[2][2], ret; fprintf(stderr, "test: datapath 0x%hx\n", typeflags); @@ -219,6 +269,11 @@ static int test_datapath(uint16_t typeflags, int port_off, fprintf(stderr, "ERROR: failed open\n"); exit(1); } + if (type == PACKET_FANOUT_CBPF) + sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA); + else if (type == PACKET_FANOUT_EBPF) + sock_fanout_set_ebpf(fds[0]); + rings[0] = sock_fanout_open_ring(fds[0]); rings[1] = sock_fanout_open_ring(fds[1]); pair_udp_open(fds_udp[0], PORT_BASE); @@ -227,11 +282,11 @@ static int test_datapath(uint16_t typeflags, int port_off, /* Send data, but not enough to overflow a queue */ pair_udp_send(fds_udp[0], 15); - pair_udp_send(fds_udp[1], 5); + pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1); ret = sock_fanout_read(fds, rings, expect1); /* Send more data, overflow the queue */ - pair_udp_send(fds_udp[0], 15); + pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1); /* TODO: ensure consistent order between expect1 and expect2 */ ret |= sock_fanout_read(fds, rings, expect2); @@ -275,6 +330,7 @@ int main(int argc, char **argv) const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; + const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; int port_off = 2, tries = 5, ret; test_control_single(); @@ -296,6 +352,11 @@ int main(int argc, char **argv) ret |= test_datapath(PACKET_FANOUT_ROLLOVER, port_off, expect_rb[0], expect_rb[1]); + ret |= test_datapath(PACKET_FANOUT_CBPF, + port_off, expect_bpf[0], expect_bpf[1]); + ret |= test_datapath(PACKET_FANOUT_EBPF, + port_off, expect_bpf[0], expect_bpf[1]); + set_cpuaffinity(0); ret |= test_datapath(PACKET_FANOUT_CPU, port_off, expect_cpu0[0], expect_cpu0[1]); diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 37da54ac85a9..24bc7ec1be7d 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -30,6 +30,7 @@ #define DATA_LEN 100 #define DATA_CHAR 'a' +#define DATA_CHAR_1 'b' #define PORT_BASE 8000 @@ -37,29 +38,36 @@ # define __maybe_unused __attribute__ ((__unused__)) #endif -static __maybe_unused void pair_udp_setfilter(int fd) +static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) { struct sock_filter bpf_filter[] = { { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ - { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ + { 0x35, 0, 4, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ - { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ - { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ - { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x15, 1, 0, DATA_CHAR }, /* JEQ DATA_CHAR [t goto match]*/ + { 0x15, 0, 1, DATA_CHAR_1}, /* JEQ DATA_CHAR_1 [t goto match]*/ { 0x06, 0, 0, 0x00000060 }, /* RET match */ { 0x06, 0, 0, 0x00000000 }, /* RET no match */ }; struct sock_fprog bpf_prog; + if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA) + bpf_filter[5].code = 0x16; /* RET A */ + bpf_prog.filter = bpf_filter; bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, + if (setsockopt(fd, lvl, optnum, &bpf_prog, sizeof(bpf_prog))) { perror("setsockopt SO_ATTACH_FILTER"); exit(1); } } +static __maybe_unused void pair_udp_setfilter(int fd) +{ + sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER); +} + static __maybe_unused void pair_udp_open(int fds[], uint16_t port) { struct sockaddr_in saddr, daddr; @@ -96,11 +104,11 @@ static __maybe_unused void pair_udp_open(int fds[], uint16_t port) } } -static __maybe_unused void pair_udp_send(int fds[], int num) +static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload) { char buf[DATA_LEN], rbuf[DATA_LEN]; - memset(buf, DATA_CHAR, sizeof(buf)); + memset(buf, payload, sizeof(buf)); while (num--) { /* Should really handle EINTR and EAGAIN */ if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { @@ -118,6 +126,11 @@ static __maybe_unused void pair_udp_send(int fds[], int num) } } +static __maybe_unused void pair_udp_send(int fds[], int num) +{ + return pair_udp_send_char(fds, num, DATA_CHAR); +} + static __maybe_unused void pair_udp_close(int fds[]) { close(fds[0]); diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 41cc3ed66818..ee179e22308c 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,8 +2,9 @@ noarg: $(MAKE) -C ../ TEST_PROGS := hugetlb_vs_thp_test subpage_prot +TEST_FILES := tempfile -all: $(TEST_PROGS) tempfile +all: $(TEST_PROGS) $(TEST_FILES) $(TEST_PROGS): ../harness.c diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c5abe7fd7590..a004b4cce99e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -14,6 +14,7 @@ #include <linux/filter.h> #include <sys/prctl.h> #include <sys/ptrace.h> +#include <sys/types.h> #include <sys/user.h> #include <linux/prctl.h> #include <linux/ptrace.h> @@ -82,7 +83,13 @@ struct seccomp_data { }; #endif +#if __BYTE_ORDER == __LITTLE_ENDIAN #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) +#else +#error "wut? Unknown __BYTE_ORDER?!" +#endif #define SIBLING_EXIT_UNKILLED 0xbadbeef #define SIBLING_EXIT_FAILURE 0xbadface @@ -1199,6 +1206,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM regs[8] # define SYSCALL_RET regs[0] +#elif defined(__powerpc__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM gpr[0] +# define SYSCALL_RET gpr[3] #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1232,7 +1243,7 @@ void change_syscall(struct __test_metadata *_metadata, ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); EXPECT_EQ(0, ret); -#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__powerpc__) { regs.SYSCALL_NUM = syscall; } @@ -1396,6 +1407,8 @@ TEST_F(TRACE_syscall, syscall_dropped) # define __NR_seccomp 383 # elif defined(__aarch64__) # define __NR_seccomp 277 +# elif defined(__powerpc__) +# define __NR_seccomp 358 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile new file mode 100644 index 000000000000..9cdadf37f114 --- /dev/null +++ b/tools/testing/selftests/static_keys/Makefile @@ -0,0 +1,8 @@ +# Makefile for static keys selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := test_static_keys.sh + +include ../lib.mk diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh new file mode 100644 index 000000000000..1261e3fa1e3a --- /dev/null +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Runs static keys kernel module tests + +if /sbin/modprobe -q test_static_key_base; then + if /sbin/modprobe -q test_static_keys; then + echo "static_key: ok" + /sbin/modprobe -q -r test_static_keys + /sbin/modprobe -q -r test_static_key_base + else + echo "static_keys: [FAIL]" + /sbin/modprobe -q -r test_static_key_base + fi +else + echo "static_key: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 231b9a031f6a..d36fab7d8ebd 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -4,14 +4,16 @@ CFLAGS = -Wall BINARIES = compaction_test BINARIES += hugepage-mmap BINARIES += hugepage-shm -BINARIES += hugetlbfstest BINARIES += map_hugetlb BINARIES += thuge-gen BINARIES += transhuge-stress +BINARIES += userfaultfd all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ -lrt +userfaultfd: userfaultfd.c + $(CC) $(CFLAGS) -O2 -o $@ $^ -lpthread TEST_PROGS := run_vmtests TEST_FILES := $(BINARIES) diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c deleted file mode 100644 index 02e1072ec187..000000000000 --- a/tools/testing/selftests/vm/hugetlbfstest.c +++ /dev/null @@ -1,86 +0,0 @@ -#define _GNU_SOURCE -#include <assert.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -typedef unsigned long long u64; - -static size_t length = 1 << 24; - -static u64 read_rss(void) -{ - char buf[4096], *s = buf; - int i, fd; - u64 rss; - - fd = open("/proc/self/statm", O_RDONLY); - assert(fd > 2); - memset(buf, 0, sizeof(buf)); - read(fd, buf, sizeof(buf) - 1); - for (i = 0; i < 1; i++) - s = strchr(s, ' ') + 1; - rss = strtoull(s, NULL, 10); - return rss << 12; /* assumes 4k pagesize */ -} - -static void do_mmap(int fd, int extra_flags, int unmap) -{ - int *p; - int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; - u64 before, after; - int ret; - - before = read_rss(); - p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); - assert(p != MAP_FAILED || - !"mmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before - length) < 0x40000 || - !"rss didn't grow as expected"); - if (!unmap) - return; - ret = munmap(p, length); - assert(!ret || !"munmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before) < 0x40000 || - !"rss didn't shrink as expected"); -} - -static int open_file(const char *path) -{ - int fd, err; - - unlink(path); - fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL - | O_LARGEFILE | O_CLOEXEC, 0600); - assert(fd > 2); - unlink(path); - err = ftruncate(fd, length); - assert(!err); - return fd; -} - -int main(void) -{ - int hugefd, fd; - - fd = open_file("/dev/shm/hugetlbhog"); - hugefd = open_file("/hugepages/hugetlbhog"); - - system("echo 100 > /proc/sys/vm/nr_hugepages"); - do_mmap(-1, MAP_ANONYMOUS, 1); - do_mmap(fd, 0, 1); - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1); - do_mmap(hugefd, 0, 1); - do_mmap(hugefd, MAP_HUGETLB, 1); - /* Leak the last one to test do_exit() */ - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0); - printf("oll korrekt.\n"); - return 0; -} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 49ece11ff7fd..9179ce8df485 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -75,10 +75,14 @@ else echo "[PASS]" fi +echo "NOTE: The above hugetlb tests provide minimal coverage. Use" +echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" +echo " hugetlb regression testing." + echo "--------------------" -echo "running hugetlbfstest" +echo "running userfaultfd" echo "--------------------" -./hugetlbfstest +./userfaultfd 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c new file mode 100644 index 000000000000..2c7cca6f26a4 --- /dev/null +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -0,0 +1,639 @@ +/* + * Stress userfaultfd syscall. + * + * Copyright (C) 2015 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * This test allocates two virtual areas and bounces the physical + * memory across the two virtual areas (from area_src to area_dst) + * using userfaultfd. + * + * There are three threads running per CPU: + * + * 1) one per-CPU thread takes a per-page pthread_mutex in a random + * page of the area_dst (while the physical page may still be in + * area_src), and increments a per-page counter in the same page, + * and checks its value against a verification region. + * + * 2) another per-CPU thread handles the userfaults generated by + * thread 1 above. userfaultfd blocking reads or poll() modes are + * exercised interleaved. + * + * 3) one last per-CPU thread transfers the memory in the background + * at maximum bandwidth (if not already transferred by thread + * 2). Each cpu thread takes cares of transferring a portion of the + * area. + * + * When all threads of type 3 completed the transfer, one bounce is + * complete. area_src and area_dst are then swapped. All threads are + * respawned and so the bounce is immediately restarted in the + * opposite direction. + * + * per-CPU threads 1 by triggering userfaults inside + * pthread_mutex_lock will also verify the atomicity of the memory + * transfer (UFFDIO_COPY). + * + * The program takes two parameters: the amounts of physical memory in + * megabytes (MiB) of the area and the number of bounces to execute. + * + * # 100MiB 99999 bounces + * ./userfaultfd 100 99999 + * + * # 1GiB 99 bounces + * ./userfaultfd 1000 99 + * + * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers + * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <signal.h> +#include <poll.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <pthread.h> +#include "../../../../include/uapi/linux/userfaultfd.h" + +#ifdef __x86_64__ +#define __NR_userfaultfd 323 +#elif defined(__i386__) +#define __NR_userfaultfd 374 +#elif defined(__powewrpc__) +#define __NR_userfaultfd 364 +#else +#error "missing __NR_userfaultfd definition" +#endif + +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; + +#define BOUNCE_RANDOM (1<<0) +#define BOUNCE_RACINGFAULTS (1<<1) +#define BOUNCE_VERIFY (1<<2) +#define BOUNCE_POLL (1<<3) +static int bounces; + +static unsigned long long *count_verify; +static int uffd, finished, *pipefd; +static char *area_src, *area_dst; +static char *zeropage; +pthread_attr_t attr; + +/* pthread_mutex_t starts at page offset 0 */ +#define area_mutex(___area, ___nr) \ + ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +#define area_count(___area, ___nr) \ + ((volatile unsigned long long *) ((unsigned long) \ + ((___area) + (___nr)*page_size + \ + sizeof(pthread_mutex_t) + \ + sizeof(unsigned long long) - 1) & \ + ~(unsigned long)(sizeof(unsigned long long) \ + - 1))) + +static int my_bcmp(char *str1, char *str2, size_t n) +{ + unsigned long i; + for (i = 0; i < n; i++) + if (str1[i] != str2[i]) + return 1; + return 0; +} + +static void *locking_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct random_data rand; + unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ + int32_t rand_nr; + unsigned long long count; + char randstate[64]; + unsigned int seed; + time_t start; + + if (bounces & BOUNCE_RANDOM) { + seed = (unsigned int) time(NULL) - bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + seed += cpu; + bzero(&rand, sizeof(rand)); + bzero(&randstate, sizeof(randstate)); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) + fprintf(stderr, "srandom_r error\n"), exit(1); + } else { + page_nr = -bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + page_nr += cpu * nr_pages_per_cpu; + } + + while (!finished) { + if (bounces & BOUNCE_RANDOM) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 1 error\n"), exit(1); + page_nr = rand_nr; + if (sizeof(page_nr) > sizeof(rand_nr)) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 2 error\n"), exit(1); + page_nr |= (((unsigned long) rand_nr) << 16) << + 16; + } + } else + page_nr += 1; + page_nr %= nr_pages; + + start = time(NULL); + if (bounces & BOUNCE_VERIFY) { + count = *area_count(area_dst, page_nr); + if (!count) + fprintf(stderr, + "page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + + + /* + * We can't use bcmp (or memcmp) because that + * returns 0 erroneously if the memory is + * changing under it (even if the end of the + * page is never changing and always + * different). + */ +#if 1 + if (!my_bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) + fprintf(stderr, + "my_bcmp page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); +#else + unsigned long loops; + + loops = 0; + /* uncomment the below line to test with mutex */ + /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */ + while (!bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) { + loops += 1; + if (loops > 10) + break; + } + /* uncomment below line to test with mutex */ + /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */ + if (loops) { + fprintf(stderr, + "page_nr %lu all zero thread %lu %p %lu\n", + page_nr, cpu, area_dst + page_nr * page_size, + loops); + if (loops > 10) + exit(1); + } +#endif + } + + pthread_mutex_lock(area_mutex(area_dst, page_nr)); + count = *area_count(area_dst, page_nr); + if (count != count_verify[page_nr]) { + fprintf(stderr, + "page_nr %lu memory corruption %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + } + count++; + *area_count(area_dst, page_nr) = count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + + if (time(NULL) - start > 1) + fprintf(stderr, + "userfault too slow %ld " + "possible false positive with overcommit\n", + time(NULL) - start); + } + + return NULL; +} + +static int copy_page(unsigned long offset) +{ + struct uffdio_copy uffdio_copy; + + if (offset >= nr_pages * page_size) + fprintf(stderr, "unexpected offset %lu\n", + offset), exit(1); + uffdio_copy.dst = (unsigned long) area_dst + offset; + uffdio_copy.src = (unsigned long) area_src + offset; + uffdio_copy.len = page_size; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) { + /* real retval in ufdio_copy.copy */ + if (uffdio_copy.copy != -EEXIST) + fprintf(stderr, "UFFDIO_COPY error %Ld\n", + uffdio_copy.copy), exit(1); + } else if (uffdio_copy.copy != page_size) { + fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", + uffdio_copy.copy), exit(1); + } else + return 1; + return 0; +} + +static void *uffd_poll_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct pollfd pollfd[2]; + struct uffd_msg msg; + int ret; + unsigned long offset; + char tmp_chr; + unsigned long userfaults = 0; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].events = POLLIN; + + for (;;) { + ret = poll(pollfd, 2, -1); + if (!ret) + fprintf(stderr, "poll error %d\n", ret), exit(1); + if (ret < 0) + perror("poll"), exit(1); + if (pollfd[1].revents & POLLIN) { + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) + fprintf(stderr, "read pipefd error\n"), + exit(1); + break; + } + if (!(pollfd[0].revents & POLLIN)) + fprintf(stderr, "pollfd[0].revents %d\n", + pollfd[0].revents), exit(1); + ret = read(uffd, &msg, sizeof(msg)); + if (ret < 0) { + if (errno == EAGAIN) + continue; + perror("nonblocking read error"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(offset)) + userfaults++; + } + return (void *)userfaults; +} + +pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void *uffd_read_thread(void *arg) +{ + unsigned long *this_cpu_userfaults; + struct uffd_msg msg; + unsigned long offset; + int ret; + + this_cpu_userfaults = (unsigned long *) arg; + *this_cpu_userfaults = 0; + + pthread_mutex_unlock(&uffd_read_mutex); + /* from here cancellation is ok */ + + for (;;) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (ret < 0) + perror("blocking read error"), exit(1); + else + fprintf(stderr, "short read\n"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (bounces & BOUNCE_VERIFY && + msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(offset)) + (*this_cpu_userfaults)++; + } + return (void *)NULL; +} + +static void *background_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + unsigned long page_nr; + + for (page_nr = cpu * nr_pages_per_cpu; + page_nr < (cpu+1) * nr_pages_per_cpu; + page_nr++) + copy_page(page_nr * page_size); + + return NULL; +} + +static int stress(unsigned long *userfaults) +{ + unsigned long cpu; + pthread_t locking_threads[nr_cpus]; + pthread_t uffd_threads[nr_cpus]; + pthread_t background_threads[nr_cpus]; + void **_userfaults = (void **) userfaults; + + finished = 0; + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pthread_create(&locking_threads[cpu], &attr, + locking_thread, (void *)cpu)) + return 1; + if (bounces & BOUNCE_POLL) { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_poll_thread, (void *)cpu)) + return 1; + } else { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_read_thread, + &_userfaults[cpu])) + return 1; + pthread_mutex_lock(&uffd_read_mutex); + } + if (pthread_create(&background_threads[cpu], &attr, + background_thread, (void *)cpu)) + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(background_threads[cpu], NULL)) + return 1; + + /* + * Be strict and immediately zap area_src, the whole area has + * been transferred already by the background treads. The + * area_src could then be faulted in in a racy way by still + * running uffdio_threads reading zeropages after we zapped + * area_src (but they're guaranteed to get -EEXIST from + * UFFDIO_COPY without writing zero pages into area_dst + * because the background threads already completed). + */ + if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise"); + return 1; + } + + for (cpu = 0; cpu < nr_cpus; cpu++) { + char c; + if (bounces & BOUNCE_POLL) { + if (write(pipefd[cpu*2+1], &c, 1) != 1) { + fprintf(stderr, "pipefd write error\n"); + return 1; + } + if (pthread_join(uffd_threads[cpu], &_userfaults[cpu])) + return 1; + } else { + if (pthread_cancel(uffd_threads[cpu])) + return 1; + if (pthread_join(uffd_threads[cpu], NULL)) + return 1; + } + } + + finished = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + + return 0; +} + +static int userfaultfd_stress(void) +{ + void *area; + char *tmp_area; + unsigned long nr; + struct uffdio_register uffdio_register; + struct uffdio_api uffdio_api; + unsigned long cpu; + int uffd_flags; + unsigned long userfaults[nr_cpus]; + + if (posix_memalign(&area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + area_src = area; + if (posix_memalign(&area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + area_dst = area; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + fprintf(stderr, + "userfaultfd syscall not available in this kernel\n"); + return 1; + } + uffd_flags = fcntl(uffd, F_GETFD, NULL); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + fprintf(stderr, "UFFDIO_API\n"); + return 1; + } + if (uffdio_api.api != UFFD_API) { + fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); + return 1; + } + + count_verify = malloc(nr_pages * sizeof(unsigned long long)); + if (!count_verify) { + perror("count_verify"); + return 1; + } + + for (nr = 0; nr < nr_pages; nr++) { + *area_mutex(area_src, nr) = (pthread_mutex_t) + PTHREAD_MUTEX_INITIALIZER; + count_verify[nr] = *area_count(area_src, nr) = 1; + } + + pipefd = malloc(sizeof(int) * nr_cpus * 2); + if (!pipefd) { + perror("pipefd"); + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) { + perror("pipe"); + return 1; + } + } + + if (posix_memalign(&area, page_size, page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + zeropage = area; + bzero(zeropage, page_size); + + pthread_mutex_lock(&uffd_read_mutex); + + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 16*1024*1024); + + while (bounces--) { + unsigned long expected_ioctls; + + printf("bounces: %d, mode:", bounces); + if (bounces & BOUNCE_RANDOM) + printf(" rnd"); + if (bounces & BOUNCE_RACINGFAULTS) + printf(" racing"); + if (bounces & BOUNCE_VERIFY) + printf(" ver"); + if (bounces & BOUNCE_POLL) + printf(" poll"); + printf(", "); + fflush(stdout); + + if (bounces & BOUNCE_POLL) + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + else + fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + + /* register */ + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + return 1; + } + expected_ioctls = (1 << _UFFDIO_WAKE) | + (1 << _UFFDIO_COPY) | + (1 << _UFFDIO_ZEROPAGE); + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) { + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"); + return 1; + } + + /* + * The madvise done previously isn't enough: some + * uffd_thread could have read userfaults (one of + * those already resolved by the background thread) + * and it may be in the process of calling + * UFFDIO_COPY. UFFDIO_COPY will read the zapped + * area_src and it would map a zero page in it (of + * course such a UFFDIO_COPY is perfectly safe as it'd + * return -EEXIST). The problem comes at the next + * bounce though: that racing UFFDIO_COPY would + * generate zeropages in the area_src, so invalidating + * the previous MADV_DONTNEED. Without this additional + * MADV_DONTNEED those zeropages leftovers in the + * area_src would lead to -EEXIST failure during the + * next bounce, effectively leaving a zeropage in the + * area_dst. + * + * Try to comment this out madvise to see the memory + * corruption being caught pretty quick. + * + * khugepaged is also inhibited to collapse THP after + * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's + * required to MADV_DONTNEED here. + */ + if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise 2"); + return 1; + } + + /* bounce pass */ + if (stress(userfaults)) + return 1; + + /* unregister */ + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { + fprintf(stderr, "register failure\n"); + return 1; + } + + /* verification */ + if (bounces & BOUNCE_VERIFY) { + for (nr = 0; nr < nr_pages; nr++) { + if (my_bcmp(area_dst, + area_dst + nr * page_size, + sizeof(pthread_mutex_t))) { + fprintf(stderr, + "error mutex 2 %lu\n", + nr); + bounces = 0; + } + if (*area_count(area_dst, nr) != count_verify[nr]) { + fprintf(stderr, + "error area_count %Lu %Lu %lu\n", + *area_count(area_src, nr), + count_verify[nr], + nr); + bounces = 0; + } + } + } + + /* prepare next bounce */ + tmp_area = area_src; + area_src = area_dst; + area_dst = tmp_area; + + printf("userfaults:"); + for (cpu = 0; cpu < nr_cpus; cpu++) + printf(" %lu", userfaults[cpu]); + printf("\n"); + } + + return 0; +} + +int main(int argc, char **argv) +{ + if (argc < 3) + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + page_size = sysconf(_SC_PAGE_SIZE); + if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) > + page_size) + fprintf(stderr, "Impossible to run this test\n"), exit(2); + nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / + nr_cpus; + if (!nr_pages_per_cpu) { + fprintf(stderr, "invalid MiB\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + bounces = atoi(argv[2]); + if (bounces <= 0) { + fprintf(stderr, "invalid bounces\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + nr_pages = nr_pages_per_cpu * nr_cpus; + printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", + nr_pages, nr_pages_per_cpu); + return userfaultfd_stress(); +} diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile new file mode 100644 index 000000000000..29d80346e3eb --- /dev/null +++ b/tools/testing/selftests/zram/Makefile @@ -0,0 +1,9 @@ +all: + +TEST_PROGS := zram.sh +TEST_FILES := zram01.sh zram02.sh zram_lib.sh + +include ../lib.mk + +clean: + $(RM) err.log diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README new file mode 100644 index 000000000000..eb17917c8a3a --- /dev/null +++ b/tools/testing/selftests/zram/README @@ -0,0 +1,40 @@ +zram: Compressed RAM based block devices +---------------------------------------- +* Introduction + +The zram module creates RAM based block devices named /dev/zram<id> +(<id> = 0, 1, ...). Pages written to these disks are compressed and stored +in memory itself. These disks allow very fast I/O and compression provides +good amounts of memory savings. Some of the usecases include /tmp storage, +use as swap disks, various caches under /var and maybe many more :) + +Statistics for individual zram devices are exported through sysfs nodes at +/sys/block/zram<id>/ + +Kconfig required: +CONFIG_ZRAM=y +CONFIG_ZRAM_LZ4_COMPRESS=y +CONFIG_ZPOOL=y +CONFIG_ZSMALLOC=y + +ZRAM Testcases +-------------- +zram_lib.sh: create library with initialization/cleanup functions +zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02 + +Two functional tests: zram01 and zram02: +zram01.sh: creates general purpose ram disks with ext4 filesystems +zram02.sh: creates block device for swap + +Commands required for testing: + - bc + - dd + - free + - awk + - mkswap + - swapon + - swapoff + - mkfs/ mkfs.ext4 + +For more information please refer: +kernel-source-tree/Documentation/blockdev/zram.txt diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh new file mode 100755 index 000000000000..20de9a761269 --- /dev/null +++ b/tools/testing/selftests/zram/zram.sh @@ -0,0 +1,35 @@ +#!/bin/bash +TCID="zram.sh" + +check_prereqs() +{ + local msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi +} + +run_zram () { +echo "--------------------" +echo "running zram tests" +echo "--------------------" +./zram01.sh +echo "" +./zram02.sh +} + +check_prereqs + +# check zram module exists +MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko +if [ -f $MODULE_PATH ]; then + run_zram +elif [ -b /dev/zram0 ]; then + run_zram +else + echo "$TCID : No zram.ko module or /dev/zram0 device file not found" + echo "$TCID : CONFIG_ZRAM is not set" + exit 1 +fi diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh new file mode 100755 index 000000000000..b9566a6478a9 --- /dev/null +++ b/tools/testing/selftests/zram/zram01.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Test creates several zram devices with different filesystems on them. +# It fills each device with zeros and checks that compression works. +# +# Author: Alexey Kodanev <alexey.kodanev@oracle.com> +# Modified: Naresh Kamboju <naresh.kamboju@linaro.org> + +TCID="zram01" +ERR_CODE=0 + +. ./zram_lib.sh + +# Test will create the following number of zram devices: +dev_num=1 +# This is a list of parameters for zram devices. +# Number of items must be equal to 'dev_num' parameter. +zram_max_streams="2" + +# The zram sysfs node 'disksize' value can be either in bytes, +# or you can use mem suffixes. But in some old kernels, mem +# suffixes are not supported, for example, in RHEL6.6GA's kernel +# layer, it uses strict_strtoull() to parse disksize which does +# not support mem suffixes, in some newer kernels, they use +# memparse() which supports mem suffixes. So here we just use +# bytes to make sure everything works correctly. +zram_sizes="2097152" # 2MB +zram_mem_limits="2M" +zram_filesystems="ext4" +zram_algs="lzo" + +zram_fill_fs() +{ + local mem_free0=$(free -m | awk 'NR==2 {print $4}') + + for i in $(seq 0 $(($dev_num - 1))); do + echo "fill zram$i..." + local b=0 + while [ true ]; do + dd conv=notrunc if=/dev/zero of=zram${i}/file \ + oflag=append count=1 bs=1024 status=none \ + > /dev/null 2>&1 || break + b=$(($b + 1)) + done + echo "zram$i can be filled with '$b' KB" + done + + local mem_free1=$(free -m | awk 'NR==2 {print $4}') + local used_mem=$(($mem_free0 - $mem_free1)) + + local total_size=0 + for sm in $zram_sizes; do + local s=$(echo $sm | sed 's/M//') + total_size=$(($total_size + $s)) + done + + echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" + + local v=$((100 * $total_size / $used_mem)) + + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + zram_cleanup + return + fi + + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" +} + +check_prereqs +zram_load +zram_max_streams +zram_compress_alg +zram_set_disksizes +zram_set_memlimit +zram_makefs +zram_mount + +zram_fill_fs +zram_cleanup +zram_unload + +if [ $ERR_CODE -ne 0 ]; then + echo "$TCID : [FAIL]" +else + echo "$TCID : [PASS]" +fi diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh new file mode 100755 index 000000000000..74569b883737 --- /dev/null +++ b/tools/testing/selftests/zram/zram02.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Test checks that we can create swap zram device. +# +# Author: Alexey Kodanev <alexey.kodanev@oracle.com> +# Modified: Naresh Kamboju <naresh.kamboju@linaro.org> + +TCID="zram02" +ERR_CODE=0 + +. ./zram_lib.sh + +# Test will create the following number of zram devices: +dev_num=1 +# This is a list of parameters for zram devices. +# Number of items must be equal to 'dev_num' parameter. +zram_max_streams="2" + +# The zram sysfs node 'disksize' value can be either in bytes, +# or you can use mem suffixes. But in some old kernels, mem +# suffixes are not supported, for example, in RHEL6.6GA's kernel +# layer, it uses strict_strtoull() to parse disksize which does +# not support mem suffixes, in some newer kernels, they use +# memparse() which supports mem suffixes. So here we just use +# bytes to make sure everything works correctly. +zram_sizes="1048576" # 1M +zram_mem_limits="1M" + +check_prereqs +zram_load +zram_max_streams +zram_set_disksizes +zram_set_memlimit +zram_makeswap +zram_swapoff +zram_cleanup +zram_unload + +if [ $ERR_CODE -ne 0 ]; then + echo "$TCID : [FAIL]" +else + echo "$TCID : [PASS]" +fi diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh new file mode 100755 index 000000000000..424e68ed1487 --- /dev/null +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -0,0 +1,232 @@ +#!/bin/sh +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Alexey Kodanev <alexey.kodanev@oracle.com> +# Modified: Naresh Kamboju <naresh.kamboju@linaro.org> + +MODULE=0 +dev_makeswap=-1 +dev_mounted=-1 + +trap INT + +check_prereqs() +{ + local msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi +} + +zram_cleanup() +{ + echo "zram cleanup" + local i= + for i in $(seq 0 $dev_makeswap); do + swapoff /dev/zram$i + done + + for i in $(seq 0 $dev_mounted); do + umount /dev/zram$i + done + + for i in $(seq 0 $(($dev_num - 1))); do + echo 1 > /sys/block/zram${i}/reset + rm -rf zram$i + done + +} + +zram_unload() +{ + if [ $MODULE -ne 0 ] ; then + echo "zram rmmod zram" + rmmod zram > /dev/null 2>&1 + fi +} + +zram_load() +{ + # check zram module exists + MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko + if [ -f $MODULE_PATH ]; then + MODULE=1 + echo "create '$dev_num' zram device(s)" + modprobe zram num_devices=$dev_num + if [ $? -ne 0 ]; then + echo "failed to insert zram module" + exit 1 + fi + + dev_num_created=$(ls /dev/zram* | wc -w) + + if [ "$dev_num_created" -ne "$dev_num" ]; then + echo "unexpected num of devices: $dev_num_created" + ERR_CODE=-1 + else + echo "zram load module successful" + fi + elif [ -b /dev/zram0 ]; then + echo "/dev/zram0 device file found: OK" + else + echo "ERROR: No zram.ko module or no /dev/zram0 device found" + echo "$TCID : CONFIG_ZRAM is not set" + exit 1 + fi +} + +zram_max_streams() +{ + echo "set max_comp_streams to zram device(s)" + + local i=0 + for max_s in $zram_max_streams; do + local sys_path="/sys/block/zram${i}/max_comp_streams" + echo $max_s > $sys_path || \ + echo "FAIL failed to set '$max_s' to $sys_path" + sleep 1 + local max_streams=$(cat $sys_path) + + [ "$max_s" -ne "$max_streams" ] && \ + echo "FAIL can't set max_streams '$max_s', get $max_stream" + + i=$(($i + 1)) + echo "$sys_path = '$max_streams' ($i/$dev_num)" + done + + echo "zram max streams: OK" +} + +zram_compress_alg() +{ + echo "test that we can set compression algorithm" + + local algs=$(cat /sys/block/zram0/comp_algorithm) + echo "supported algs: $algs" + local i=0 + for alg in $zram_algs; do + local sys_path="/sys/block/zram${i}/comp_algorithm" + echo "$alg" > $sys_path || \ + echo "FAIL can't set '$alg' to $sys_path" + i=$(($i + 1)) + echo "$sys_path = '$alg' ($i/$dev_num)" + done + + echo "zram set compression algorithm: OK" +} + +zram_set_disksizes() +{ + echo "set disk size to zram device(s)" + local i=0 + for ds in $zram_sizes; do + local sys_path="/sys/block/zram${i}/disksize" + echo "$ds" > $sys_path || \ + echo "FAIL can't set '$ds' to $sys_path" + + i=$(($i + 1)) + echo "$sys_path = '$ds' ($i/$dev_num)" + done + + echo "zram set disksizes: OK" +} + +zram_set_memlimit() +{ + echo "set memory limit to zram device(s)" + + local i=0 + for ds in $zram_mem_limits; do + local sys_path="/sys/block/zram${i}/mem_limit" + echo "$ds" > $sys_path || \ + echo "FAIL can't set '$ds' to $sys_path" + + i=$(($i + 1)) + echo "$sys_path = '$ds' ($i/$dev_num)" + done + + echo "zram set memory limit: OK" +} + +zram_makeswap() +{ + echo "make swap with zram device(s)" + local i=0 + for i in $(seq 0 $(($dev_num - 1))); do + mkswap /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL mkswap /dev/zram$1 failed" + fi + + swapon /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL swapon /dev/zram$1 failed" + fi + + echo "done with /dev/zram$i" + dev_makeswap=$i + done + + echo "zram making zram mkswap and swapon: OK" +} + +zram_swapoff() +{ + local i= + for i in $(seq 0 $dev_makeswap); do + swapoff /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL swapoff /dev/zram$i failed" + fi + done + dev_makeswap=-1 + + echo "zram swapoff: OK" +} + +zram_makefs() +{ + local i=0 + for fs in $zram_filesystems; do + # if requested fs not supported default it to ext2 + which mkfs.$fs > /dev/null 2>&1 || fs=ext2 + + echo "make $fs filesystem on /dev/zram$i" + mkfs.$fs /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL failed to make $fs on /dev/zram$i" + fi + i=$(($i + 1)) + echo "zram mkfs.$fs: OK" + done +} + +zram_mount() +{ + local i=0 + for i in $(seq 0 $(($dev_num - 1))); do + echo "mount /dev/zram$i" + mkdir zram$i + mount /dev/zram$i zram$i > /dev/null || \ + echo "FAIL mount /dev/zram$i failed" + dev_mounted=$i + done + + echo "zram mount of zram device(s): OK" +} diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 8bdf16b8ba60..7f73fa32a590 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -57,23 +57,15 @@ * pagemap kernel ABI bits */ -#define PM_ENTRY_BYTES sizeof(uint64_t) -#define PM_STATUS_BITS 3 -#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) -#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) -#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) -#define PM_PSHIFT_BITS 6 -#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) -#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) -#define __PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) -#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) -#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) - -#define __PM_SOFT_DIRTY (1LL) -#define PM_PRESENT PM_STATUS(4LL) -#define PM_SWAP PM_STATUS(2LL) -#define PM_SOFT_DIRTY __PM_PSHIFT(__PM_SOFT_DIRTY) - +#define PM_ENTRY_BYTES 8 +#define PM_PFRAME_BITS 55 +#define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +#define PM_SOFT_DIRTY (1ULL << 55) +#define PM_MMAP_EXCLUSIVE (1ULL << 56) +#define PM_FILE (1ULL << 61) +#define PM_SWAP (1ULL << 62) +#define PM_PRESENT (1ULL << 63) /* * kernel page flags @@ -100,6 +92,8 @@ #define KPF_SLOB_FREE 49 #define KPF_SLUB_FROZEN 50 #define KPF_SLUB_DEBUG 51 +#define KPF_FILE 62 +#define KPF_MMAP_EXCLUSIVE 63 #define KPF_ALL_BITS ((uint64_t)~0ULL) #define KPF_HACKERS_BITS (0xffffULL << 32) @@ -149,6 +143,9 @@ static const char * const page_flag_names[] = { [KPF_SLOB_FREE] = "P:slob_free", [KPF_SLUB_FROZEN] = "A:slub_frozen", [KPF_SLUB_DEBUG] = "E:slub_debug", + + [KPF_FILE] = "F:file", + [KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive", }; @@ -452,6 +449,10 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) if (pme & PM_SOFT_DIRTY) flags |= BIT(SOFTDIRTY); + if (pme & PM_FILE) + flags |= BIT(FILE); + if (pme & PM_MMAP_EXCLUSIVE) + flags |= BIT(MMAP_EXCLUSIVE); return flags; } |