From 5db58faf989f16d1d6a3d661aac616f9ca7932aa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 11 Nov 2016 10:55:11 -0800 Subject: bpf: Add tests for the LRU bpf_htab This patch has some unit tests and a test_lru_dist. The test_lru_dist reads in the numeric keys from a file. The files used here are generated by a modified fio-genzipf tool originated from the fio test suit. The sample data file can be found here: https://github.com/iamkafai/bpf-lru The zipf.* data files have 100k numeric keys and the key is also ranged from 1 to 100k. The test_lru_dist outputs the number of unique keys (nr_unique). F.e. The following means, 61239 of them is unique out of 100k keys. nr_misses means it cannot be found in the LRU map, so nr_misses must be >= nr_unique. test_lru_dist also simulates a perfect LRU map as a comparison: [root@arch-fb-vm1 ~]# ~/devshare/fb-kernel/linux/samples/bpf/test_lru_dist \ /root/zipf.100k.a1_01.out 4000 1 ... test_parallel_lru_dist (map_type:9 map_flags:0x0): task:0 BPF LRU: nr_unique:23093(/100000) nr_misses:31603(/100000) task:0 Perfect LRU: nr_unique:23093(/100000 nr_misses:34328(/100000) .... test_parallel_lru_dist (map_type:9 map_flags:0x2): task:0 BPF LRU: nr_unique:23093(/100000) nr_misses:31710(/100000) task:0 Perfect LRU: nr_unique:23093(/100000 nr_misses:34328(/100000) [root@arch-fb-vm1 ~]# ~/devshare/fb-kernel/linux/samples/bpf/test_lru_dist \ /root/zipf.100k.a0_01.out 40000 1 ... test_parallel_lru_dist (map_type:9 map_flags:0x0): task:0 BPF LRU: nr_unique:61239(/100000) nr_misses:67054(/100000) task:0 Perfect LRU: nr_unique:61239(/100000 nr_misses:66993(/100000) ... test_parallel_lru_dist (map_type:9 map_flags:0x2): task:0 BPF LRU: nr_unique:61239(/100000) nr_misses:67068(/100000) task:0 Perfect LRU: nr_unique:61239(/100000 nr_misses:66993(/100000) LRU map has also been added to map_perf_test: /* Global LRU */ [root@kerneltest003.31.prn1 ~]# for i in 1 4 8; do echo -n "$i cpus: "; \ ./map_perf_test 16 $i | awk '{r += $3}END{print r " updates"}'; done 1 cpus: 2934082 updates 4 cpus: 7391434 updates 8 cpus: 6500576 updates /* Percpu LRU */ [root@kerneltest003.31.prn1 ~]# for i in 1 4 8; do echo -n "$i cpus: "; \ ./map_perf_test 32 $i | awk '{r += $3}END{print r " updates"}'; done 1 cpus: 2896553 updates 4 cpus: 9766395 updates 8 cpus: 17460553 updates Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 2 + samples/bpf/map_perf_test_kern.c | 39 +++ samples/bpf/map_perf_test_user.c | 32 +++ samples/bpf/test_lru_dist.c | 538 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 611 insertions(+) create mode 100644 samples/bpf/test_lru_dist.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index ac87f9c068ae..f394ac616ed8 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -2,6 +2,7 @@ obj- := dummy.o # List of programs to build +hostprogs-y := test_lru_dist hostprogs-y += sock_example hostprogs-y += fds_example hostprogs-y += sockex1 @@ -28,6 +29,7 @@ hostprogs-y += trace_event hostprogs-y += sampleip hostprogs-y += tc_l2_redirect +test_lru_dist-objs := test_lru_dist.o libbpf.o sock_example-objs := sock_example.o libbpf.o fds_example-objs := bpf_load.o libbpf.o fds_example.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 311538e5a701..7ee1574c8ccf 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -19,6 +19,21 @@ struct bpf_map_def SEC("maps") hash_map = { .max_entries = MAX_ENTRIES, }; +struct bpf_map_def SEC("maps") lru_hash_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") percpu_lru_hash_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 10000, + .map_flags = BPF_F_NO_COMMON_LRU, +}; + struct bpf_map_def SEC("maps") percpu_hash_map = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(u32), @@ -53,6 +68,7 @@ int stress_hmap(struct pt_regs *ctx) value = bpf_map_lookup_elem(&hash_map, &key); if (value) bpf_map_delete_elem(&hash_map, &key); + return 0; } @@ -96,5 +112,28 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) bpf_map_delete_elem(&percpu_hash_map_alloc, &key); return 0; } + +SEC("kprobe/sys_getpid") +int stress_lru_hmap_alloc(struct pt_regs *ctx) +{ + u32 key = bpf_get_prandom_u32(); + long val = 1; + + bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); + + return 0; +} + +SEC("kprobe/sys_getppid") +int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx) +{ + u32 key = bpf_get_prandom_u32(); + long val = 1; + + bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 3147377e8fd3..9505b4d112f4 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -35,6 +35,8 @@ static __u64 time_get_ns(void) #define PERCPU_HASH_PREALLOC (1 << 1) #define HASH_KMALLOC (1 << 2) #define PERCPU_HASH_KMALLOC (1 << 3) +#define LRU_HASH_PREALLOC (1 << 4) +#define PERCPU_LRU_HASH_PREALLOC (1 << 5) static int test_flags = ~0; @@ -50,6 +52,30 @@ static void test_hash_prealloc(int cpu) cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); } +static void test_lru_hash_prealloc(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getpid); + printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", + cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); +} + +static void test_percpu_lru_hash_prealloc(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getppid); + printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", + cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); +} + static void test_percpu_hash_prealloc(int cpu) { __u64 start_time; @@ -105,6 +131,12 @@ static void loop(int cpu) if (test_flags & PERCPU_HASH_KMALLOC) test_percpu_hash_kmalloc(cpu); + + if (test_flags & LRU_HASH_PREALLOC) + test_lru_hash_prealloc(cpu); + + if (test_flags & PERCPU_LRU_HASH_PREALLOC) + test_percpu_lru_hash_prealloc(cpu); } static void run_perf_test(int tasks) diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c new file mode 100644 index 000000000000..2859977b7f37 --- /dev/null +++ b/samples/bpf/test_lru_dist.c @@ -0,0 +1,538 @@ +/* + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +static int nr_cpus; +static unsigned long long *dist_keys; +static unsigned int dist_key_counts; + +struct list_head { + struct list_head *next, *prev; +}; + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +static inline void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void __list_del_entry(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del_entry(list); + list_add(list, head); +} + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +struct pfect_lru_node { + struct list_head list; + unsigned long long key; +}; + +struct pfect_lru { + struct list_head list; + struct pfect_lru_node *free_nodes; + unsigned int cur_size; + unsigned int lru_size; + unsigned int nr_unique; + unsigned int nr_misses; + unsigned int total; + int map_fd; +}; + +static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, + unsigned int nr_possible_elems) +{ + lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(unsigned long long), + sizeof(struct pfect_lru_node *), + nr_possible_elems, 0); + assert(lru->map_fd != -1); + + lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); + assert(lru->free_nodes); + + INIT_LIST_HEAD(&lru->list); + lru->cur_size = 0; + lru->lru_size = lru_size; + lru->nr_unique = lru->nr_misses = lru->total = 0; +} + +static void pfect_lru_destroy(struct pfect_lru *lru) +{ + close(lru->map_fd); + free(lru->free_nodes); +} + +static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, + unsigned long long key) +{ + struct pfect_lru_node *node = NULL; + int seen = 0; + + lru->total++; + if (!bpf_lookup_elem(lru->map_fd, &key, &node)) { + if (node) { + list_move(&node->list, &lru->list); + return 1; + } + seen = 1; + } + + if (lru->cur_size < lru->lru_size) { + node = &lru->free_nodes[lru->cur_size++]; + INIT_LIST_HEAD(&node->list); + } else { + struct pfect_lru_node *null_node = NULL; + + node = list_last_entry(&lru->list, + struct pfect_lru_node, + list); + bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); + } + + node->key = key; + list_move(&node->list, &lru->list); + + lru->nr_misses++; + if (seen) { + assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); + } else { + lru->nr_unique++; + assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); + } + + return seen; +} + +static unsigned int read_keys(const char *dist_file, + unsigned long long **keys) +{ + struct stat fst; + unsigned long long *retkeys; + unsigned int counts = 0; + int dist_fd; + char *b, *l; + int i; + + dist_fd = open(dist_file, 0); + assert(dist_fd != -1); + + assert(fstat(dist_fd, &fst) == 0); + b = malloc(fst.st_size); + assert(b); + + assert(read(dist_fd, b, fst.st_size) == fst.st_size); + close(dist_fd); + for (i = 0; i < fst.st_size; i++) { + if (b[i] == '\n') + counts++; + } + counts++; /* in case the last line has no \n */ + + retkeys = malloc(counts * sizeof(unsigned long long)); + assert(retkeys); + + counts = 0; + for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n")) + retkeys[counts++] = strtoull(l, NULL, 10); + free(b); + + *keys = retkeys; + + return counts; +} + +static int create_map(int map_type, int map_flags, unsigned int size) +{ + int map_fd; + + map_fd = bpf_create_map(map_type, sizeof(unsigned long long), + sizeof(unsigned long long), size, map_flags); + + if (map_fd == -1) + perror("bpf_create_map"); + + return map_fd; +} + +static int sched_next_online(int pid, int next_to_try) +{ + cpu_set_t cpuset; + + if (next_to_try == nr_cpus) + return -1; + + while (next_to_try < nr_cpus) { + CPU_ZERO(&cpuset); + CPU_SET(next_to_try++, &cpuset); + if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) + break; + } + + return next_to_try; +} + +static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), + void *data) +{ + int next_sched_cpu = 0; + pid_t pid[tasks]; + int i; + + for (i = 0; i < tasks; i++) { + pid[i] = fork(); + if (pid[i] == 0) { + next_sched_cpu = sched_next_online(0, next_sched_cpu); + fn(i, data); + exit(0); + } else if (pid[i] == -1) { + printf("couldn't spawn #%d process\n", i); + exit(1); + } + /* It is mostly redundant and just allow the parent + * process to update next_shced_cpu for the next child + * process + */ + next_sched_cpu = sched_next_online(pid[i], next_sched_cpu); + } + for (i = 0; i < tasks; i++) { + int status; + + assert(waitpid(pid[i], &status, 0) == pid[i]); + assert(status == 0); + } +} + +static void do_test_lru_dist(int task, void *data) +{ + unsigned int nr_misses = 0; + struct pfect_lru pfect_lru; + unsigned long long key, value = 1234; + unsigned int i; + + unsigned int lru_map_fd = ((unsigned int *)data)[0]; + unsigned int lru_size = ((unsigned int *)data)[1]; + unsigned long long key_offset = task * dist_key_counts; + + pfect_lru_init(&pfect_lru, lru_size, dist_key_counts); + + for (i = 0; i < dist_key_counts; i++) { + key = dist_keys[i] + key_offset; + + pfect_lru_lookup_or_insert(&pfect_lru, key); + + if (!bpf_lookup_elem(lru_map_fd, &key, &value)) + continue; + + if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { + printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n", + key, errno); + assert(0); + } + + nr_misses++; + } + + printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", + task, pfect_lru.nr_unique, dist_key_counts, nr_misses, + dist_key_counts); + printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", + task, pfect_lru.nr_unique, pfect_lru.total, + pfect_lru.nr_misses, pfect_lru.total); + + pfect_lru_destroy(&pfect_lru); + close(lru_map_fd); +} + +static void test_parallel_lru_dist(int map_type, int map_flags, + int nr_tasks, unsigned int lru_size) +{ + int child_data[2]; + int lru_map_fd; + + printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, + map_flags); + + if (map_flags & BPF_F_NO_COMMON_LRU) + lru_map_fd = create_map(map_type, map_flags, + nr_cpus * lru_size); + else + lru_map_fd = create_map(map_type, map_flags, + nr_tasks * lru_size); + assert(lru_map_fd != -1); + + child_data[0] = lru_map_fd; + child_data[1] = lru_size; + + run_parallel(nr_tasks, do_test_lru_dist, child_data); + + close(lru_map_fd); +} + +static void test_lru_loss0(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + unsigned int old_unused_losses = 0; + unsigned int new_unused_losses = 0; + unsigned int used_losses = 0; + int map_fd; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, 0) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + map_fd = create_map(map_type, map_flags, 900 * nr_cpus); + else + map_fd = create_map(map_type, map_flags, 900); + + assert(map_fd != -1); + + value[0] = 1234; + + for (key = 1; key <= 1000; key++) { + int start_key, end_key; + + assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); + + start_key = 101; + end_key = min(key, 900); + + while (start_key <= end_key) { + bpf_lookup_elem(map_fd, &start_key, value); + start_key++; + } + } + + for (key = 1; key <= 1000; key++) { + if (bpf_lookup_elem(map_fd, &key, value)) { + if (key <= 100) + old_unused_losses++; + else if (key <= 900) + used_losses++; + else + new_unused_losses++; + } + } + + close(map_fd); + + printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) " + "newer-elem-losses:%d(/100)\n", + old_unused_losses, used_losses, new_unused_losses); +} + +static void test_lru_loss1(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + int map_fd; + unsigned int nr_losses = 0; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, 0) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + map_fd = create_map(map_type, map_flags, 1000 * nr_cpus); + else + map_fd = create_map(map_type, map_flags, 1000); + + assert(map_fd != -1); + + value[0] = 1234; + + for (key = 1; key <= 1000; key++) + assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST)); + + for (key = 1; key <= 1000; key++) { + if (bpf_lookup_elem(map_fd, &key, value)) + nr_losses++; + } + + close(map_fd); + + printf("nr_losses:%d(/1000)\n", nr_losses); +} + +static void do_test_parallel_lru_loss(int task, void *data) +{ + const unsigned int nr_stable_elems = 1000; + const unsigned int nr_repeats = 100000; + + int map_fd = *(int *)data; + unsigned long long stable_base; + unsigned long long key, value[nr_cpus]; + unsigned long long next_ins_key; + unsigned int nr_losses = 0; + unsigned int i; + + stable_base = task * nr_repeats * 2 + 1; + next_ins_key = stable_base; + value[0] = 1234; + for (i = 0; i < nr_stable_elems; i++) { + assert(bpf_update_elem(map_fd, &next_ins_key, value, + BPF_NOEXIST) == 0); + next_ins_key++; + } + + for (i = 0; i < nr_repeats; i++) { + int rn; + + rn = rand(); + + if (rn % 10) { + key = rn % nr_stable_elems + stable_base; + bpf_lookup_elem(map_fd, &key, value); + } else { + bpf_update_elem(map_fd, &next_ins_key, value, + BPF_NOEXIST); + next_ins_key++; + } + } + + key = stable_base; + for (i = 0; i < nr_stable_elems; i++) { + if (bpf_lookup_elem(map_fd, &key, value)) + nr_losses++; + key++; + } + + printf(" task:%d nr_losses:%u\n", task, nr_losses); +} + +static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) +{ + int map_fd; + + printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, + map_flags); + + /* Give 20% more than the active working set */ + if (map_flags & BPF_F_NO_COMMON_LRU) + map_fd = create_map(map_type, map_flags, + nr_cpus * (1000 + 200)); + else + map_fd = create_map(map_type, map_flags, + nr_tasks * (1000 + 200)); + + assert(map_fd != -1); + + run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd); + + close(map_fd); +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; + const char *dist_file; + int nr_tasks = 1; + int lru_size; + int f; + + if (argc < 4) { + printf("Usage: %s \n", + argv[0]); + return -1; + } + + dist_file = argv[1]; + lru_size = atoi(argv[2]); + nr_tasks = atoi(argv[3]); + + setbuf(stdout, NULL); + + assert(!setrlimit(RLIMIT_MEMLOCK, &r)); + + srand(time(NULL)); + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + assert(nr_cpus != -1); + printf("nr_cpus:%d\n\n", nr_cpus); + + nr_tasks = min(nr_tasks, nr_cpus); + + dist_key_counts = read_keys(dist_file, &dist_keys); + if (!dist_key_counts) { + printf("%s has no key\n", dist_file); + return -1; + } + + for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) { + test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); + test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); + test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], + nr_tasks); + test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f], + nr_tasks, lru_size); + printf("\n"); + } + + free(dist_keys); + + return 0; +} -- cgit v1.2.3