From 5efde6d73d58ec1ba6e22cc0cbc89fbdb38e632c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 5 Apr 2023 17:17:24 -0700
Subject: KVM: selftests: Refactor stable TSC check to use TEST_REQUIRE()

Refactor the nested TSC scaling test's check on a stable system TSC to
use TEST_REQUIRE() to do the heavy lifting when the system doesn't have
a stable TSC.  Using a helper+TEST_REQUIRE() eliminates the need for
gotos and a custom message.

Cc: Hao Ge <gehao@kylinos.cn>
Cc: Vipin Sharma <vipinsh@google.com>
Link: https://lore.kernel.org/r/20230406001724.706668-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../kvm/x86_64/vmx_nested_tsc_scaling_test.c       | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
index fa03c8d1ce4e..e710b6e7fb38 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -116,29 +116,21 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_DONE();
 }
 
-static void stable_tsc_check_supported(void)
+static bool system_has_stable_tsc(void)
 {
+	bool tsc_is_stable;
 	FILE *fp;
 	char buf[4];
 
 	fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
 	if (fp == NULL)
-		goto skip_test;
+		return false;
 
-	if (fgets(buf, sizeof(buf), fp) == NULL)
-		goto close_fp;
+	tsc_is_stable = fgets(buf, sizeof(buf), fp) &&
+			!strncmp(buf, "tsc", sizeof(buf));
 
-	if (strncmp(buf, "tsc", sizeof(buf)))
-		goto close_fp;
-
-	fclose(fp);
-	return;
-
-close_fp:
 	fclose(fp);
-skip_test:
-	print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
-	exit(KSFT_SKIP);
+	return tsc_is_stable;
 }
 
 int main(int argc, char *argv[])
@@ -156,7 +148,7 @@ int main(int argc, char *argv[])
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
-	stable_tsc_check_supported();
+	TEST_REQUIRE(system_has_stable_tsc());
 
 	/*
 	 * We set L1's scale factor to be a random number from 2 to 10.
-- 
cgit v1.2.3


From 56f413f2cd373d6ed7c4ecb2e0e3e740cc2fdc8c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 17 Apr 2023 18:53:22 +0100
Subject: KVM: selftests: Fix spelling mistake "miliseconds" -> "milliseconds"

There is a spelling mistake in the help for the -p option. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20230417175322.53249-1-colin.i.king@gmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 251794f83719..7f36c32fa760 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -226,7 +226,7 @@ static void help(char *name)
 	puts("");
 	printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
 	puts("");
-	printf(" -p: The NX reclaim period in miliseconds.\n");
+	printf(" -p: The NX reclaim period in milliseconds.\n");
 	printf(" -t: The magic token to indicate environment setup is done.\n");
 	printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
 	puts("");
-- 
cgit v1.2.3


From ba125de35da5184c5325bef5c4c89f6928ce8875 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 27 Apr 2023 16:11:11 -0400
Subject: KVM: selftests: Setup vcpu_alias only for minor mode test

This fixes two things:

- Unbreaks MISSING mode test on anonymous memory type

- Prefault alias mem before uffd thread creations, otherwise the uffd
  thread timing will be inaccurate when guest mem size is large, because
  it'll take prefault time into total time.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: James Houghton <jthoughton@google.com>
Link: https://lore.kernel.org/r/20230427201112.2164776-2-peterx@redhat.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 2439c4043fed..9c18686b4f63 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -128,6 +128,7 @@ static void prefault_mem(void *alias, uint64_t len)
 
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
+	struct memstress_vcpu_args *vcpu_args;
 	struct test_params *p = arg;
 	struct uffd_desc **uffd_descs = NULL;
 	struct timespec start;
@@ -145,24 +146,24 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		    "Failed to allocate buffer for guest data pattern");
 	memset(guest_data_prototype, 0xAB, demand_paging_size);
 
+	if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+		for (i = 0; i < nr_vcpus; i++) {
+			vcpu_args = &memstress_args.vcpu_args[i];
+			prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
+				     vcpu_args->pages * memstress_args.guest_page_size);
+		}
+	}
+
 	if (p->uffd_mode) {
 		uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
 		TEST_ASSERT(uffd_descs, "Memory allocation failed");
-
 		for (i = 0; i < nr_vcpus; i++) {
-			struct memstress_vcpu_args *vcpu_args;
 			void *vcpu_hva;
-			void *vcpu_alias;
 
 			vcpu_args = &memstress_args.vcpu_args[i];
 
 			/* Cache the host addresses of the region */
 			vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
-			vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
-
-			prefault_mem(vcpu_alias,
-				vcpu_args->pages * memstress_args.guest_page_size);
-
 			/*
 			 * Set up user fault fd to handle demand paging
 			 * requests.
-- 
cgit v1.2.3


From 21912a653d7dc9b79f3b7e9884179d7b7d593448 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 27 Apr 2023 16:11:12 -0400
Subject: KVM: selftests: Allow dumping per-vcpu info for uffd threads

There's one PER_VCPU_DEBUG in per-vcpu uffd threads but it's never hit.

Trigger that when quit in normal ways (kick pollfd[1]), meanwhile fix the
number of nanosec calculation.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: James Houghton <jthoughton@google.com>
Link: https://lore.kernel.org/r/20230427201112.2164776-3-peterx@redhat.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/lib/userfaultfd_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 92cef20902f1..271f63891581 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -70,7 +70,7 @@ static void *uffd_handler_thread_fn(void *arg)
 			r = read(pollfd[1].fd, &tmp_chr, 1);
 			TEST_ASSERT(r == 1,
 				    "Error reading pipefd in UFFD thread\n");
-			return NULL;
+			break;
 		}
 
 		if (!(pollfd[0].revents & POLLIN))
@@ -103,7 +103,7 @@ static void *uffd_handler_thread_fn(void *arg)
 	ts_diff = timespec_elapsed(start);
 	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
 		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
-		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
 
 	return NULL;
 }
-- 
cgit v1.2.3


From 07b4b2f4047f600ca7974797900b7409081f826c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 12 Apr 2023 16:09:13 -0400
Subject: KVM: selftests: touch all pages of args on each memstress iteration

Access the same memory addresses on each iteration of the memstress
guest code.  This ensures that the state of KVM's page tables
is the same after every iteration, including the pages that host the
guest page tables for args and vcpu_args.

This difference is visible when running the proposed
dirty_log_page_splitting_test[*] on AMD, or on Intel with pml=0 and
eptad=0.  The tests fail due to different semantics of dirty bits for
page-table pages on AMD (and eptad=0) and Intel.  Both AMD and Intel with
eptad=0 treat page-table accesses as writes, therefore more pages are
dropped before the repopulation phase when dirty logging is disabled.

The "missing" page had been included in the population phase because it
hosts the page tables for vcpu_args, but repopulation does not need it."

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Vipin Sharma <vipinsh@google.com>
Link: https://lore.kernel.org/r/20230412200913.1570873-1-pbonzini@redhat.com
[sean: add additional details in changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/lib/memstress.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 5f1d3173c238..7d2f812e7c9a 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -64,6 +64,9 @@ void memstress_guest_code(uint32_t vcpu_idx)
 	GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx);
 
 	while (true) {
+		for (i = 0; i < sizeof(memstress_args); i += args->guest_page_size)
+			(void) *((volatile char *)args + i);
+
 		for (i = 0; i < pages; i++) {
 			if (args->random_access)
 				page = guest_random_u32(&rand_state) % pages;
-- 
cgit v1.2.3


From de10b798055db5df93474cdfa5dbffc57169f458 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 31 Jan 2023 18:18:19 +0000
Subject: KVM: selftests: Move dirty logging functions to memstress.(c|h)

Move some helper functions from dirty_log_perf_test.c to the memstress
library so that they can be used in a future commit which tests page
splitting during dirty logging.

Reviewed-by: Vipin Sharma <vipinsh@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
Link: https://lore.kernel.org/r/20230131181820.179033-2-bgardon@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 84 ++---------------------
 tools/testing/selftests/kvm/include/memstress.h   |  8 +++
 tools/testing/selftests/kvm/lib/memstress.c       | 72 +++++++++++++++++++
 3 files changed, 87 insertions(+), 77 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index e9d6d1aecf89..416719e20518 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -136,77 +136,6 @@ struct test_params {
 	bool random_access;
 };
 
-static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
-{
-	int i;
-
-	for (i = 0; i < slots; i++) {
-		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
-		int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
-
-		vm_mem_region_set_flags(vm, slot, flags);
-	}
-}
-
-static inline void enable_dirty_logging(struct kvm_vm *vm, int slots)
-{
-	toggle_dirty_logging(vm, slots, true);
-}
-
-static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
-{
-	toggle_dirty_logging(vm, slots, false);
-}
-
-static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
-{
-	int i;
-
-	for (i = 0; i < slots; i++) {
-		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
-
-		kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
-	}
-}
-
-static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
-			    int slots, uint64_t pages_per_slot)
-{
-	int i;
-
-	for (i = 0; i < slots; i++) {
-		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
-
-		kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
-	}
-}
-
-static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
-{
-	unsigned long **bitmaps;
-	int i;
-
-	bitmaps = malloc(slots * sizeof(bitmaps[0]));
-	TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
-
-	for (i = 0; i < slots; i++) {
-		bitmaps[i] = bitmap_zalloc(pages_per_slot);
-		TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
-	}
-
-	return bitmaps;
-}
-
-static void free_bitmaps(unsigned long *bitmaps[], int slots)
-{
-	int i;
-
-	for (i = 0; i < slots; i++)
-		free(bitmaps[i]);
-
-	free(bitmaps);
-}
-
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
@@ -236,7 +165,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
 	pages_per_slot = host_num_pages / p->slots;
 
-	bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
+	bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot);
 
 	if (dirty_log_manual_caps)
 		vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
@@ -277,7 +206,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	/* Enable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	enable_dirty_logging(vm, p->slots);
+	memstress_enable_dirty_logging(vm, p->slots);
 	ts_diff = timespec_elapsed(start);
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -306,7 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
 		clock_gettime(CLOCK_MONOTONIC, &start);
-		get_dirty_log(vm, bitmaps, p->slots);
+		memstress_get_dirty_log(vm, bitmaps, p->slots);
 		ts_diff = timespec_elapsed(start);
 		get_dirty_log_total = timespec_add(get_dirty_log_total,
 						   ts_diff);
@@ -315,7 +244,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 		if (dirty_log_manual_caps) {
 			clock_gettime(CLOCK_MONOTONIC, &start);
-			clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
+			memstress_clear_dirty_log(vm, bitmaps, p->slots,
+						  pages_per_slot);
 			ts_diff = timespec_elapsed(start);
 			clear_dirty_log_total = timespec_add(clear_dirty_log_total,
 							     ts_diff);
@@ -334,7 +264,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	/* Disable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	disable_dirty_logging(vm, p->slots);
+	memstress_disable_dirty_logging(vm, p->slots);
 	ts_diff = timespec_elapsed(start);
 	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -359,7 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 	}
 
-	free_bitmaps(bitmaps, p->slots);
+	memstress_free_bitmaps(bitmaps, p->slots);
 	arch_cleanup_vm(vm);
 	memstress_destroy_vm(vm);
 }
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
index 72e3e358ef7b..ce4e603050ea 100644
--- a/tools/testing/selftests/kvm/include/memstress.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -72,4 +72,12 @@ void memstress_guest_code(uint32_t vcpu_id);
 uint64_t memstress_nested_pages(int nr_vcpus);
 void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
 
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots);
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+			       int slots, uint64_t pages_per_slot);
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot);
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots);
+
 #endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 7d2f812e7c9a..df457452d146 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -5,6 +5,7 @@
 #define _GNU_SOURCE
 
 #include <inttypes.h>
+#include <linux/bitmap.h>
 
 #include "kvm_util.h"
 #include "memstress.h"
@@ -323,3 +324,74 @@ void memstress_join_vcpu_threads(int nr_vcpus)
 	for (i = 0; i < nr_vcpus; i++)
 		pthread_join(vcpu_threads[i].thread, NULL);
 }
+
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+	int i;
+
+	for (i = 0; i < slots; i++) {
+		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+		int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+		vm_mem_region_set_flags(vm, slot, flags);
+	}
+}
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+	toggle_dirty_logging(vm, slots, true);
+}
+
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+	toggle_dirty_logging(vm, slots, false);
+}
+
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
+{
+	int i;
+
+	for (i = 0; i < slots; i++) {
+		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+		kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
+	}
+}
+
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+			       int slots, uint64_t pages_per_slot)
+{
+	int i;
+
+	for (i = 0; i < slots; i++) {
+		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+		kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
+	}
+}
+
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+	unsigned long **bitmaps;
+	int i;
+
+	bitmaps = malloc(slots * sizeof(bitmaps[0]));
+	TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+	for (i = 0; i < slots; i++) {
+		bitmaps[i] = bitmap_zalloc(pages_per_slot);
+		TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+	}
+
+	return bitmaps;
+}
+
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+	int i;
+
+	for (i = 0; i < slots; i++)
+		free(bitmaps[i]);
+
+	free(bitmaps);
+}
-- 
cgit v1.2.3


From dfa78a20cc879205b2c6239300dac09907ad3da1 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 31 Jan 2023 18:18:20 +0000
Subject: KVM: selftests: Add dirty logging page splitting test

Add a test for page splitting during dirty logging and for hugepage
recovery after dirty logging.

Page splitting represents non-trivial behavior, which is complicated
by MANUAL_PROTECT mode, which causes pages to be split on the first
clear, instead of when dirty logging is enabled.

Add a test which makes assertions about page counts to help define the
expected behavior of page splitting and to provide needed coverage of the
behavior. This also helps ensure that a failure in eager page splitting
is not covered up by splitting in the vCPU path.

Tested by running the test on an Intel Haswell machine w/wo
MANUAL_PROTECT.

Reviewed-by: Vipin Sharma <vipinsh@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
Link: https://lore.kernel.org/r/20230131181820.179033-3-bgardon@google.com
[sean: let the user run without hugetlb, as suggested by Paolo]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../kvm/x86_64/dirty_log_page_splitting_test.c     | 259 +++++++++++++++++++++
 2 files changed, 260 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 7a5ff646e7e7..ee41ff0c5a86 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -61,6 +61,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
 # Compiled test targets
 TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
 TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
new file mode 100644
index 000000000000..beb7e2c10211
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+
+#define VCPUS		2
+#define SLOTS		2
+#define ITERATIONS	2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+	uint64_t pages_4k;
+	uint64_t pages_2m;
+	uint64_t pages_1g;
+	uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+	stats->pages_4k = vm_get_stat(vm, "pages_4k");
+	stats->pages_2m = vm_get_stat(vm, "pages_2m");
+	stats->pages_1g = vm_get_stat(vm, "pages_1g");
+	stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+	pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+		 stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+		 stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+	int i;
+
+	iteration++;
+	for (i = 0; i < VCPUS; i++) {
+		while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+		       iteration)
+			;
+	}
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+	int vcpu_idx = vcpu_args->vcpu_idx;
+
+	while (!READ_ONCE(host_quit)) {
+		int current_iteration = READ_ONCE(iteration);
+
+		vcpu_run(vcpu);
+
+		ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+		vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+		/* Wait for the start of the next iteration to be signaled. */
+		while (current_iteration == READ_ONCE(iteration) &&
+		       READ_ONCE(iteration) >= 0 &&
+		       !READ_ONCE(host_quit))
+			;
+	}
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+	struct kvm_vm *vm;
+	unsigned long **bitmaps;
+	uint64_t guest_num_pages;
+	uint64_t host_num_pages;
+	uint64_t pages_per_slot;
+	int i;
+	uint64_t total_4k_pages;
+	struct kvm_page_stats stats_populated;
+	struct kvm_page_stats stats_dirty_logging_enabled;
+	struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+	struct kvm_page_stats stats_clear_pass[ITERATIONS];
+	struct kvm_page_stats stats_dirty_logging_disabled;
+	struct kvm_page_stats stats_repopulated;
+
+	vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+				 SLOTS, backing_src, false);
+
+	guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+	pages_per_slot = host_num_pages / SLOTS;
+
+	bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+	if (dirty_log_manual_caps)
+		vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+			      dirty_log_manual_caps);
+
+	/* Start the iterations */
+	iteration = -1;
+	host_quit = false;
+
+	for (i = 0; i < VCPUS; i++)
+		vcpu_last_completed_iteration[i] = -1;
+
+	memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+	run_vcpu_iteration(vm);
+	get_page_stats(vm, &stats_populated, "populating memory");
+
+	/* Enable dirty logging */
+	memstress_enable_dirty_logging(vm, SLOTS);
+
+	get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+	while (iteration < ITERATIONS) {
+		run_vcpu_iteration(vm);
+		get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+			       "dirtying memory");
+
+		memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+		if (dirty_log_manual_caps) {
+			memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+			get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+		}
+	}
+
+	/* Disable dirty logging */
+	memstress_disable_dirty_logging(vm, SLOTS);
+
+	get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+	/* Run vCPUs again to fault pages back in. */
+	run_vcpu_iteration(vm);
+	get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+	/*
+	 * Tell the vCPU threads to quit.  No need to manually check that vCPUs
+	 * have stopped running after disabling dirty logging, the join will
+	 * wait for them to exit.
+	 */
+	host_quit = true;
+	memstress_join_vcpu_threads(VCPUS);
+
+	memstress_free_bitmaps(bitmaps, SLOTS);
+	memstress_destroy_vm(vm);
+
+	/* Make assertions about the page counts. */
+	total_4k_pages = stats_populated.pages_4k;
+	total_4k_pages += stats_populated.pages_2m * 512;
+	total_4k_pages += stats_populated.pages_1g * 512 * 512;
+
+	/*
+	 * Check that all huge pages were split. Since large pages can only
+	 * exist in the data slot, and the vCPUs should have dirtied all pages
+	 * in the data slot, there should be no huge pages left after splitting.
+	 * Splitting happens at dirty log enable time without
+	 * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+	 * with that capability.
+	 */
+	if (dirty_log_manual_caps) {
+		ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+		ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+		ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+	} else {
+		ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+		ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+	}
+
+	/*
+	 * Once dirty logging is disabled and the vCPUs have touched all their
+	 * memory again, the page counts should be the same as they were
+	 * right after initial population of memory.
+	 */
+	ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
+	ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+	ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+	       name);
+	puts("");
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+	       "     (default: 1G)\n");
+	backing_src_help("-s");
+	puts("");
+}
+
+int main(int argc, char *argv[])
+{
+	int opt;
+
+	TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+	TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+	while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+		switch (opt) {
+		case 'b':
+			guest_percpu_mem_size = parse_size(optarg);
+			break;
+		case 'h':
+			help(argv[0]);
+			exit(0);
+		case 's':
+			backing_src = parse_backing_src_type(optarg);
+			break;
+		default:
+			help(argv[0]);
+			exit(1);
+		}
+	}
+
+	if (!is_backing_src_hugetlb(backing_src)) {
+		pr_info("This test will only work reliably with HugeTLB memory. "
+			"It can work with THP, but that is best effort.\n");
+	}
+
+	guest_modes_append_default();
+
+	dirty_log_manual_caps = 0;
+	for_each_guest_mode(run_test, NULL);
+
+	dirty_log_manual_caps =
+		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+	if (dirty_log_manual_caps) {
+		dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+					  KVM_DIRTY_LOG_INITIALLY_SET);
+		for_each_guest_mode(run_test, NULL);
+	} else {
+		pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3


From 2c76131319982f9c9410bc12127ac1df4e810b87 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 26 May 2023 14:03:40 -0700
Subject: KVM: selftests: Extend cpuid_test to verify KVM_GET_CPUID2 "nent"
 updates

Verify that KVM reports the actual number of CPUID entries on success, but
doesn't touch the userspace struct on failure (which for better or worse,
is KVM's ABI).

Link: https://lore.kernel.org/r/20230526210340.2799158-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/x86_64/cpuid_test.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index 2fc3ad9c887e..d3c3aa93f090 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -163,6 +163,25 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
 	ent->eax = eax;
 }
 
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+	int i, r;
+
+	vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+	TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+		    "KVM didn't update nent on success, wanted %u, got %u\n",
+		    vcpu->cpuid->nent, cpuid->nent);
+
+	for (i = 0; i < vcpu->cpuid->nent; i++) {
+		cpuid->nent = i;
+		r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+		TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+		TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+	}
+	free(cpuid);
+}
+
 int main(void)
 {
 	struct kvm_vcpu *vcpu;
@@ -183,5 +202,7 @@ int main(void)
 
 	set_cpuid_after_run(vcpu);
 
+	test_get_cpuid2(vcpu);
+
 	kvm_vm_free(vm);
 }
-- 
cgit v1.2.3


From d4ec586c60ab978554245c58cf432df444c93b4e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 6 Jun 2023 17:12:26 -0700
Subject: KVM: selftests: Allow specify physical cpu list in demand paging test

Mimic the dirty log test and allow the user to pin demand paging test
tasks to physical CPUs.

Put the help message into a general helper as suggested by Sean.

Signed-off-by: Peter Xu <peterx@redhat.com>
[sean: rebase, tweak arg ordering, add "print" to helper, print program name]
Link: https://lore.kernel.org/r/20230607001226.1398889-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c    | 15 +++++++++++++--
 tools/testing/selftests/kvm/dirty_log_perf_test.c   | 12 +-----------
 tools/testing/selftests/kvm/include/kvm_util_base.h |  1 +
 tools/testing/selftests/kvm/lib/kvm_util.c          | 17 +++++++++++++++++
 4 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 9c18686b4f63..09c116a82a84 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -208,10 +208,11 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
-	       "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
+	       "          [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
 	guest_modes_help();
 	printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
 	       "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
+	kvm_print_vcpu_pinning_help();
 	printf(" -d: add a delay in usec to the User Fault\n"
 	       "     FD handler to simulate demand paging\n"
 	       "     overheads. Ignored without -u.\n");
@@ -229,6 +230,7 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	const char *cpulist = NULL;
 	struct test_params p = {
 		.src_type = DEFAULT_VM_MEM_SRC,
 		.partition_vcpu_memory_access = true,
@@ -237,7 +239,7 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
+	while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) {
 		switch (opt) {
 		case 'm':
 			guest_modes_cmdline(optarg);
@@ -264,6 +266,9 @@ int main(int argc, char *argv[])
 			TEST_ASSERT(nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
+		case 'c':
+			cpulist = optarg;
+			break;
 		case 'o':
 			p.partition_vcpu_memory_access = false;
 			break;
@@ -279,6 +284,12 @@ int main(int argc, char *argv[])
 		TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
 	}
 
+	if (cpulist) {
+		kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu,
+				       nr_vcpus);
+		memstress_args.pin_vcpus = true;
+	}
+
 	for_each_guest_mode(run_test, &p);
 
 	return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 416719e20518..d374dbcf9a53 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -332,17 +332,7 @@ static void help(char *name)
 	       "     so -w X means each page has an X%% chance of writing\n"
 	       "     and a (100-X)%% chance of reading.\n"
 	       "     (default: 100 i.e. all pages are written to.)\n");
-	printf(" -c: Pin tasks to physical CPUs.  Takes a list of comma separated\n"
-	       "     values (target pCPU), one for each vCPU, plus an optional\n"
-	       "     entry for the main application task (specified via entry\n"
-	       "     <nr_vcpus + 1>).  If used, entries must be provided for all\n"
-	       "     vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
-	       "     E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
-	       "     vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
-	       "         ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n"
-	       "     To leave the application task unpinned, drop the final entry:\n\n"
-	       "         ./dirty_log_perf_test -v 3 -c 22,23,24\n\n"
-	       "     (default: no pinning)\n");
+	kvm_print_vcpu_pinning_help();
 	puts("");
 	exit(0);
 }
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index a089c356f354..07732a157ccd 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -733,6 +733,7 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
 
 void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_print_vcpu_pinning_help(void);
 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
 			    int nr_vcpus);
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 298c4372fb1a..9741a7ff6380 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -494,6 +494,23 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
 	return pcpu;
 }
 
+void kvm_print_vcpu_pinning_help(void)
+{
+	const char *name = program_invocation_name;
+
+	printf(" -c: Pin tasks to physical CPUs.  Takes a list of comma separated\n"
+	       "     values (target pCPU), one for each vCPU, plus an optional\n"
+	       "     entry for the main application task (specified via entry\n"
+	       "     <nr_vcpus + 1>).  If used, entries must be provided for all\n"
+	       "     vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
+	       "     E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
+	       "     vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
+	       "         %s -v 3 -c 22,23,24,50\n\n"
+	       "     To leave the application task unpinned, drop the final entry:\n\n"
+	       "         %s -v 3 -c 22,23,24\n\n"
+	       "     (default: no pinning)\n", name, name);
+}
+
 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
 			    int nr_vcpus)
 {
-- 
cgit v1.2.3


From 5ed19528db8ddcf0113d721f67a381be3e30c65a Mon Sep 17 00:00:00 2001
From: Yu Zhang <yu.c.zhang@linux.intel.com>
Date: Thu, 1 Jun 2023 16:03:38 +0800
Subject: KVM: selftests: Add new CFLAGS to generate dependency files

Add "-MD" in CFLAGS to generate dependency files. Currently, each
time a header file is updated in KVM selftest, we will have to run
"make clean && make" to rebuild the whole test suite. By adding new
compiling flags and dependent rules in Makefile, we do not need to
make clean && make each time a header file is updated.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Link: https://lore.kernel.org/r/20230601080338.212942-1-yu.c.zhang@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/Makefile | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ee41ff0c5a86..de10581ea108 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -184,6 +184,8 @@ TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
 LIBKVM += $(LIBKVM_$(ARCH_DIR))
 
+OVERRIDE_TARGETS = 1
+
 # lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
 # importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
 # which causes the environment variable to override the makefile).
@@ -198,7 +200,7 @@ else
 LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
 endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-	-Wno-gnu-variable-sized-type-not-at-end \
+	-Wno-gnu-variable-sized-type-not-at-end -MD\
 	-fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
 	-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
 	-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
@@ -225,7 +227,18 @@ LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
 LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
 LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
 
-EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
+TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+-include $(TEST_DEP_FILES)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
+$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
 $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
-- 
cgit v1.2.3


From e325ba2271849e25017743496bcec8d3a83cacb3 Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Fri, 24 Mar 2023 15:54:24 +0100
Subject: KVM: s390: selftests: add selftest for CMMA migration

Add a selftest for CMMA migration on s390.

The tests cover:
- interaction of dirty tracking and migration mode, see my recent patch
  "KVM: s390: disable migration mode when dirty tracking is disabled" [1],
- several invalid calls of KVM_S390_GET_CMMA_BITS, for example: invalid
  flags, CMMA support off, with/without peeking
- ensure KVM_S390_GET_CMMA_BITS initally reports all pages as dirty,
- ensure KVM_S390_GET_CMMA_BITS properly skips over holes in memslots, but
  also non-dirty pages

Note that without the patch at [1] and the small fix in this series, the
selftests will fail.

[1] https://lore.kernel.org/all/20230127140532.230651-2-nrb@linux.ibm.com/

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Message-Id: <20230324145424.293889-3-nrb@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
[frankja@linux.ibm.com: squashed
20230606150510.671301-1-nrb@linux.ibm.com / "KVM: s390: selftests:
CMMA: don't run if CMMA not supported"]
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 tools/testing/selftests/kvm/Makefile          |   1 +
 tools/testing/selftests/kvm/s390x/cmma_test.c | 700 ++++++++++++++++++++++++++
 2 files changed, 701 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/s390x/cmma_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 7a5ff646e7e7..e0e5bf120326 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -163,6 +163,7 @@ TEST_GEN_PROGS_s390x = s390x/memop
 TEST_GEN_PROGS_s390x += s390x/resets
 TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/tprot
+TEST_GEN_PROGS_s390x += s390x/cmma_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
new file mode 100644
index 000000000000..1d73e78e8fa7
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ *  Nico Boehr <nrb@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN 4096
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN 8192
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+	asm volatile(
+		/* load TEST_DATA_START_GFN into r1 */
+		"	llilf 1,%[start_gfn]\n"
+		/* calculate the address from the gfn */
+		"	sllg 1,1,12(0)\n"
+		/* set the first page in TEST_DATA memslot to STABLE */
+		"	.insn rrf,0xb9ab0000,2,1,1,0\n"
+		/* hypercall */
+		"	diag 0,0,0x501\n"
+		"0:	j 0b"
+		:
+		: [start_gfn] "L"(TEST_DATA_START_GFN)
+		: "r1", "r2", "memory", "cc"
+	);
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+	asm volatile(
+		/* r1 = TEST_DATA_START_GFN */
+		"	xgr 1,1\n"
+		"	llilf 1,%[start_gfn]\n"
+		/* r5 = TEST_DATA_PAGE_COUNT */
+		"	lghi 5,%[page_count]\n"
+		/* r5 += r1 */
+		"2:	agfr 5,1\n"
+		/* r2 = r1 << 12 */
+		"1:	sllg 2,1,12(0)\n"
+		/* essa(r4, r2, SET_STABLE) */
+		"	.insn rrf,0xb9ab0000,4,2,1,0\n"
+		/* i++ */
+		"	agfi 1,1\n"
+		/* if r1 < r5 goto 1 */
+		"	cgrjl 1,5,1b\n"
+		/* hypercall */
+		"	diag 0,0,0x501\n"
+		"0:	j 0b"
+		:
+		: [start_gfn] "L"(TEST_DATA_START_GFN),
+		  [page_count] "L"(TEST_DATA_PAGE_COUNT)
+		:
+			/* the counter in our loop over the pages */
+			"r1",
+			/* the calculated page physical address */
+			"r2",
+			/* ESSA output register */
+			"r4",
+			/* last page */
+			"r5",
+			"cc", "memory"
+	);
+}
+
+static struct kvm_vm *create_vm(void)
+{
+	return ____vm_create(VM_MODE_DEFAULT);
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+	int i;
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+	/* set the array of memslots to zero like __vm_create does */
+	for (i = 0; i < NR_MEM_REGIONS; i++)
+		vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+	vm_userspace_mem_region_add(vm,
+				    VM_MEM_SRC_ANONYMOUS,
+				    TEST_DATA_START_GFN << vm->page_shift,
+				    TEST_DATA_MEMSLOT,
+				    TEST_DATA_PAGE_COUNT,
+				    0
+				   );
+	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+	/*
+	 * Our VM has the following memory layout:
+	 * +------+---------------------------+
+	 * | GFN  | Memslot                   |
+	 * +------+---------------------------+
+	 * | 0    |                           |
+	 * | ...  | MAIN (Code, Stack, ...)   |
+	 * | 511  |                           |
+	 * +------+---------------------------+
+	 * | 4096 |                           |
+	 * | ...  | TEST_DATA                 |
+	 * | 4607 |                           |
+	 * +------+---------------------------+
+	 */
+	create_main_memslot(vm);
+	create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+	struct userspace_mem_region *slot0;
+
+	kvm_vm_elf_load(vm, program_invocation_name);
+
+	slot0 = memslot2region(vm, 0);
+	ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+	kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+	struct kvm_vm *vm;
+
+	vm = create_vm();
+
+	create_memslots(vm);
+
+	finish_vm_setup(vm);
+
+	return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+	int r;
+
+	r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+	TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+	vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+	vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+	return __kvm_device_attr_set(vm->fd,
+				     KVM_S390_VM_MIGRATION,
+				     KVM_S390_VM_MIGRATION_START,
+				     NULL
+				    );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+	int r = __enable_migration_mode(vm);
+
+	TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+	u64 out;
+	int r;
+
+	r = __kvm_device_attr_get(vm->fd,
+				  KVM_S390_VM_MIGRATION,
+				  KVM_S390_VM_MIGRATION_STATUS,
+				  &out
+				 );
+	TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+	return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+	struct kvm_s390_cmma_log args;
+	int rc;
+
+	errno = 0;
+
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = 0,
+		.count = sizeof(cmma_value_buf),
+		.flags = flags,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+	*errno_out = errno;
+	return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+	struct kvm_vm *vm = create_vm_two_memslots();
+	struct kvm_vcpu *vcpu;
+	int rc, errno_out;
+
+	/* GET_CMMA_BITS without CMMA enabled should fail */
+	rc = vm_get_cmma_bits(vm, 0, &errno_out);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_out, ENXIO);
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+	vcpu_run(vcpu);
+
+	/* GET_CMMA_BITS without migration mode and without peeking should fail */
+	rc = vm_get_cmma_bits(vm, 0, &errno_out);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_out, EINVAL);
+
+	/* GET_CMMA_BITS without migration mode and with peeking should work */
+	rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+	ASSERT_EQ(rc, 0);
+	ASSERT_EQ(errno_out, 0);
+
+	enable_dirty_tracking(vm);
+	enable_migration_mode(vm);
+
+	/* GET_CMMA_BITS with invalid flags */
+	rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_out, EINVAL);
+
+	kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+	ASSERT_EQ(vcpu->run->exit_reason, 13);
+	ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+	ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+	ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+	struct kvm_vm *vm = create_vm();
+	struct kvm_vcpu *vcpu;
+	u64 orig_psw;
+	int rc;
+
+	/* enabling migration mode on a VM without memory should fail */
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno, EINVAL);
+	TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+	errno = 0;
+
+	create_memslots(vm);
+	finish_vm_setup(vm);
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+	orig_psw = vcpu->run->psw_addr;
+
+	/*
+	 * Execute one essa instruction in the guest. Otherwise the guest will
+	 * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+	 */
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	/* migration mode when memslots have dirty tracking off should fail */
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno, EINVAL);
+	TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+	errno = 0;
+
+	/* enable dirty tracking */
+	enable_dirty_tracking(vm);
+
+	/* enabling migration mode should work now */
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, 0);
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	errno = 0;
+
+	/* execute another ESSA instruction to see this goes fine */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	/*
+	 * With migration mode on, create a new memslot with dirty tracking off.
+	 * This should turn off migration mode.
+	 */
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	vm_userspace_mem_region_add(vm,
+				    VM_MEM_SRC_ANONYMOUS,
+				    TEST_DATA_TWO_START_GFN << vm->page_shift,
+				    TEST_DATA_TWO_MEMSLOT,
+				    TEST_DATA_TWO_PAGE_COUNT,
+				    0
+				   );
+	TEST_ASSERT(!is_migration_mode_on(vm),
+		    "creating memslot without dirty tracking turns off migration mode"
+		   );
+
+	/* ESSA instructions should still execute fine */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	/*
+	 * Turn on dirty tracking on the new memslot.
+	 * It should be possible to turn migration mode back on again.
+	 */
+	vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+	rc = __enable_migration_mode(vm);
+	ASSERT_EQ(rc, 0);
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	errno = 0;
+
+	/*
+	 * Turn off dirty tracking again, this time with just a flag change.
+	 * Again, migration mode should turn off.
+	 */
+	TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+	vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+	TEST_ASSERT(!is_migration_mode_on(vm),
+		    "disabling dirty tracking should turn off migration mode"
+		   );
+
+	/* ESSA instructions should still execute fine */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+	struct kvm_s390_cmma_log args;
+
+	/*
+	 * First iteration - everything should be dirty.
+	 * Start at the main memslot...
+	 */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = 0,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+	ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+	ASSERT_EQ(args.start_gfn, 0);
+
+	/* ...and then - after a hole - the TEST_DATA memslot should follow */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = MAIN_PAGE_COUNT,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+	ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+	ASSERT_EQ(args.remaining, 0);
+
+	/* ...and nothing else should be there */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	ASSERT_EQ(args.count, 0);
+	ASSERT_EQ(args.start_gfn, 0);
+	ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+	struct kvm_s390_cmma_log args;
+
+	/* If we start from GFN 0 again, nothing should be dirty. */
+	args = (struct kvm_s390_cmma_log){
+		.start_gfn = 0,
+		.count = sizeof(cmma_value_buf),
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+	if (args.count || args.remaining || args.start_gfn)
+		TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+			  args.start_gfn,
+			  args.count,
+			  args.remaining
+			 );
+}
+
+static void test_get_inital_dirty(void)
+{
+	struct kvm_vm *vm = create_vm_two_memslots();
+	struct kvm_vcpu *vcpu;
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+	/*
+	 * Execute one essa instruction in the guest. Otherwise the guest will
+	 * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+	 */
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	enable_dirty_tracking(vm);
+	enable_migration_mode(vm);
+
+	assert_all_slots_cmma_dirty(vm);
+
+	/* Start from the beginning again and make sure nothing else is dirty */
+	assert_no_pages_cmma_dirty(vm);
+
+	kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+			     u64 start_gfn, u64 gfn_count,
+			     struct kvm_s390_cmma_log *res_out)
+{
+	*res_out = (struct kvm_s390_cmma_log){
+		.start_gfn = start_gfn,
+		.count = gfn_count,
+		.flags = 0,
+		.values = (__u64)&cmma_value_buf[0]
+	};
+	memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+	vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+			      u64 dirty_gfn_count,
+			      const struct kvm_s390_cmma_log *res)
+{
+	ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+	ASSERT_EQ(res->count, dirty_gfn_count);
+	for (size_t i = 0; i < dirty_gfn_count; i++)
+		ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+	ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+	size_t gfn_offset;
+	struct kvm_vm *vm = create_vm_two_memslots();
+	struct kvm_s390_cmma_log log;
+	struct kvm_vcpu *vcpu;
+	u64 orig_psw;
+
+	enable_cmma(vm);
+	vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+	orig_psw = vcpu->run->psw_addr;
+
+	/*
+	 * Execute some essa instructions in the guest. Otherwise the guest will
+	 * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+	 */
+	vcpu_run(vcpu);
+	assert_exit_was_hypercall(vcpu);
+
+	enable_dirty_tracking(vm);
+	enable_migration_mode(vm);
+
+	/* un-dirty all pages */
+	assert_all_slots_cmma_dirty(vm);
+
+	/* Then, dirty just the TEST_DATA memslot */
+	vcpu->run->psw_addr = orig_psw;
+	vcpu_run(vcpu);
+
+	gfn_offset = TEST_DATA_START_GFN;
+	/**
+	 * Query CMMA attributes of one page, starting at page 0. Since the
+	 * main memslot was not touched by the VM, this should yield the first
+	 * page of the TEST_DATA memslot.
+	 * The dirty bitmap should now look like this:
+	 * 0: not dirty
+	 * [0x1, 0x200): dirty
+	 */
+	query_cmma_range(vm, 0, 1, &log);
+	assert_cmma_dirty(gfn_offset, 1, &log);
+	gfn_offset++;
+
+	/**
+	 * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+	 * memslot. This should wrap back to the beginning of the TEST_DATA
+	 * memslot, page 1.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x21): not dirty
+	 * [0x21, 0x200): dirty
+	 */
+	query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+	assert_cmma_dirty(gfn_offset, 0x20, &log);
+	gfn_offset += 0x20;
+
+	/* Skip 32 pages */
+	gfn_offset += 0x20;
+
+	/**
+	 * After skipping 32 pages, query the next 32 (0x20) pages.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x21): not dirty
+	 * [0x21, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	query_cmma_range(vm, gfn_offset, 0x20, &log);
+	assert_cmma_dirty(gfn_offset, 0x20, &log);
+	gfn_offset += 0x20;
+
+	/**
+	 * Query 1 page from the beginning of the TEST_DATA memslot. This should
+	 * yield page 0x21.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x22): not dirty
+	 * [0x22, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+	assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+	gfn_offset++;
+
+	/**
+	 * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+	 * This should yield pages [0x23, 0x33).
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x22): not dirty
+	 * 0x22: dirty
+	 * [0x23, 0x33): not dirty
+	 * [0x33, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x23;
+	query_cmma_range(vm, gfn_offset, 15, &log);
+	assert_cmma_dirty(gfn_offset, 15, &log);
+
+	/**
+	 * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+	 * This should yield page [0x22, 0x33)
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x33): not dirty
+	 * [0x33, 0x41): dirty
+	 * [0x41, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x22;
+	query_cmma_range(vm, gfn_offset, 17, &log);
+	assert_cmma_dirty(gfn_offset, 17, &log);
+
+	/**
+	 * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+	 * This should yield page 0x40 and nothing more, since there are more
+	 * than 16 non-dirty pages after page 0x40.
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x33): not dirty
+	 * [0x33, 0x40): dirty
+	 * [0x40, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x40;
+	query_cmma_range(vm, gfn_offset, 25, &log);
+	assert_cmma_dirty(gfn_offset, 1, &log);
+
+	/**
+	 * Query pages [0x33, 0x40).
+	 * The dirty bitmap should now look like this:
+	 * [0, 0x61): not dirty
+	 * [0x61, 0x200): dirty
+	 */
+	gfn_offset = TEST_DATA_START_GFN + 0x33;
+	query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+	assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+	/**
+	 * Query the remaining pages [0x61, 0x200).
+	 */
+	gfn_offset = TEST_DATA_START_GFN;
+	query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+	assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+	assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+	const char *name;
+	void (*test)(void);
+} testlist[] = {
+	{ "migration mode and dirty tracking", test_migration_mode },
+	{ "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+	{ "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+	{ "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+	struct kvm_vm *vm = create_vm();
+	int r;
+
+	r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+	kvm_vm_free(vm);
+
+	return r;
+}
+
+int main(int argc, char *argv[])
+{
+	int idx;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+	TEST_REQUIRE(machine_has_cmma());
+
+	ksft_print_header();
+
+	ksft_set_plan(ARRAY_SIZE(testlist));
+
+	for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+		testlist[idx].test();
+		ksft_test_result_pass("%s\n", testlist[idx].name);
+	}
+
+	ksft_finished();	/* Print results and exit() accordingly */
+}
-- 
cgit v1.2.3