diff options
author | Vitaly Kuznetsov <vkuznets@redhat.com> | 2018-03-04 22:17:21 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-03-06 09:57:17 -0800 |
commit | bba072d1627222325e79b7b8e6c0847e4a32d96c (patch) | |
tree | ad0180b4990be6a6003075e0c08e1004df681542 /drivers/hv | |
parent | 4f098af514d2608b74025a27faecc24cce238b71 (diff) | |
download | linux-stable-bba072d1627222325e79b7b8e6c0847e4a32d96c.tar.gz linux-stable-bba072d1627222325e79b7b8e6c0847e4a32d96c.tar.bz2 linux-stable-bba072d1627222325e79b7b8e6c0847e4a32d96c.zip |
hv_balloon: fix bugs in num_pages_onlined accounting
Our num_pages_onlined accounting is buggy:
1) In case we're offlining a memory block which was present at boot (e.g.
when there was no hotplug at all) we subtract 32k from 0 and as
num_pages_onlined is unsigned get a very big positive number.
2) Commit 6df8d9aaf3af ("Drivers: hv: balloon: Correctly update onlined
page count") made num_pages_onlined counter accurate on onlining but
totally incorrect on offlining for partly populated regions: no matter
how many pages were onlined and what was actually added to
num_pages_onlined counter we always subtract the full region (32k) so
again, num_pages_onlined can wrap around zero. By onlining/offlining
the same partly populated region multiple times we can make the
situation worse.
Solve these issues by doing accurate accounting on offlining: walk HAS
list, check for covered range and gaps.
Fixes: 6df8d9aaf3af ("Drivers: hv: balloon: Correctly update onlined page count")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/hv_balloon.c | 82 |
1 files changed, 69 insertions, 13 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 5b8e1ad1bcfe..7514a32d0de4 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -576,11 +576,65 @@ static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); #ifdef CONFIG_MEMORY_HOTPLUG +static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, + unsigned long pfn) +{ + struct hv_hotadd_gap *gap; + + /* The page is not backed. */ + if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) + return false; + + /* Check for gaps. */ + list_for_each_entry(gap, &has->gap_list, list) { + if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) + return false; + } + + return true; +} + +static unsigned long hv_page_offline_check(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn = start_pfn, count = 0; + struct hv_hotadd_state *has; + bool found; + + while (pfn < start_pfn + nr_pages) { + /* + * Search for HAS which covers the pfn and when we find one + * count how many consequitive PFNs are covered. + */ + found = false; + list_for_each_entry(has, &dm_device.ha_region_list, list) { + while ((pfn >= has->start_pfn) && + (pfn < has->end_pfn) && + (pfn < start_pfn + nr_pages)) { + found = true; + if (has_pfn_is_backed(has, pfn)) + count++; + pfn++; + } + } + + /* + * This PFN is not in any HAS (e.g. we're offlining a region + * which was present at boot), no need to account for it. Go + * to the next one. + */ + if (!found) + pfn++; + } + + return count; +} + static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mem = (struct memory_notify *)v; - unsigned long flags; + unsigned long flags, pfn_count; switch (val) { case MEM_ONLINE: @@ -593,7 +647,19 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, case MEM_OFFLINE: spin_lock_irqsave(&dm_device.ha_lock, flags); - dm_device.num_pages_onlined -= mem->nr_pages; + pfn_count = hv_page_offline_check(mem->start_pfn, + mem->nr_pages); + if (pfn_count <= dm_device.num_pages_onlined) { + dm_device.num_pages_onlined -= pfn_count; + } else { + /* + * We're offlining more pages than we managed to online. + * This is unexpected. In any case don't let + * num_pages_onlined wrap around zero. + */ + WARN_ON_ONCE(1); + dm_device.num_pages_onlined = 0; + } spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; case MEM_GOING_ONLINE: @@ -612,19 +678,9 @@ static struct notifier_block hv_memory_nb = { /* Check if the particular page is backed and can be onlined and online it. */ static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg) { - struct hv_hotadd_gap *gap; - unsigned long pfn = page_to_pfn(pg); - - /* The page is not backed. */ - if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) + if (!has_pfn_is_backed(has, page_to_pfn(pg))) return; - /* Check for gaps. */ - list_for_each_entry(gap, &has->gap_list, list) { - if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) - return; - } - /* This frame is currently backed; online the page. */ __online_page_set_limits(pg); __online_page_increment_counters(pg); |