summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/memcontrol.c14
-rw-r--r--mm/memory_hotplug.c9
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmu_notifier.c79
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/pagewalk.c70
7 files changed, 101 insertions, 82 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 03a89a2f464b..362c329b83fe 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2325,7 +2325,12 @@ static void collapse_huge_page(struct mm_struct *mm,
pte_unmap(pte);
spin_lock(&mm->page_table_lock);
BUG_ON(!pmd_none(*pmd));
- set_pmd_at(mm, address, pmd, _pmd);
+ /*
+ * We can only use set_pmd_at when establishing
+ * hugepmds and never for establishing regular pmds that
+ * points to regular pagetables. Use pmd_populate for that
+ */
+ pmd_populate(mm, pmd, pmd_pgtable(_pmd));
spin_unlock(&mm->page_table_lock);
anon_vma_unlock_write(vma->anon_vma);
goto out;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cb1c9dedf9b6..010d6c14129a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4108,8 +4108,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
if (mem_cgroup_disabled())
return NULL;
- VM_BUG_ON(PageSwapCache(page));
-
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
VM_BUG_ON(!PageTransHuge(page));
@@ -4205,6 +4203,18 @@ void mem_cgroup_uncharge_page(struct page *page)
if (page_mapped(page))
return;
VM_BUG_ON(page->mapping && !PageAnon(page));
+ /*
+ * If the page is in swap cache, uncharge should be deferred
+ * to the swap path, which also properly accounts swap usage
+ * and handles memcg lifetime.
+ *
+ * Note that this check is not stable and reclaim may add the
+ * page to swap cache at any time after this. However, if the
+ * page is not in swap cache by the time page->mapcount hits
+ * 0, there won't be any page table references to the swap
+ * slot, and reclaim will free it and not actually write the
+ * page to disk.
+ */
if (PageSwapCache(page))
return;
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a221fac1f47d..1ad92b46753e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -720,9 +720,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
start = phys_start_pfn << PAGE_SHIFT;
size = nr_pages * PAGE_SIZE;
ret = release_mem_region_adjustable(&iomem_resource, start, size);
- if (ret)
- pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n",
- start, start + size - 1, ret);
+ if (ret) {
+ resource_size_t endres = start + size - 1;
+
+ pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
+ &start, &endres, ret);
+ }
sections_to_remove = nr_pages / PAGES_PER_SECTION;
for (i = 0; i < sections_to_remove; i++) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 27ed22579fd9..b1f57501de9c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -165,7 +165,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
pte = arch_make_huge_pte(pte, vma, new, 0);
}
#endif
- flush_cache_page(vma, addr, pte_pfn(pte));
+ flush_dcache_page(new);
set_pte_at(mm, addr, ptep, pte);
if (PageHuge(new)) {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index be04122fb277..6725ff183374 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm)
int id;
/*
- * srcu_read_lock() here will block synchronize_srcu() in
- * mmu_notifier_unregister() until all registered
- * ->release() callouts this function makes have
- * returned.
+ * SRCU here will block mmu_notifier_unregister until
+ * ->release returns.
*/
id = srcu_read_lock(&srcu);
+ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist)
+ /*
+ * If ->release runs before mmu_notifier_unregister it must be
+ * handled, as it's the only way for the driver to flush all
+ * existing sptes and stop the driver from establishing any more
+ * sptes before all the pages in the mm are freed.
+ */
+ if (mn->ops->release)
+ mn->ops->release(mn, mm);
+ srcu_read_unlock(&srcu, id);
+
spin_lock(&mm->mmu_notifier_mm->lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
mn = hlist_entry(mm->mmu_notifier_mm->list.first,
struct mmu_notifier,
hlist);
-
/*
- * Unlink. This will prevent mmu_notifier_unregister()
- * from also making the ->release() callout.
+ * We arrived before mmu_notifier_unregister so
+ * mmu_notifier_unregister will do nothing other than to wait
+ * for ->release to finish and for mmu_notifier_unregister to
+ * return.
*/
hlist_del_init_rcu(&mn->hlist);
- spin_unlock(&mm->mmu_notifier_mm->lock);
-
- /*
- * Clear sptes. (see 'release' description in mmu_notifier.h)
- */
- if (mn->ops->release)
- mn->ops->release(mn, mm);
-
- spin_lock(&mm->mmu_notifier_mm->lock);
}
spin_unlock(&mm->mmu_notifier_mm->lock);
/*
- * All callouts to ->release() which we have done are complete.
- * Allow synchronize_srcu() in mmu_notifier_unregister() to complete
- */
- srcu_read_unlock(&srcu, id);
-
- /*
- * mmu_notifier_unregister() may have unlinked a notifier and may
- * still be calling out to it. Additionally, other notifiers
- * may have been active via vmtruncate() et. al. Block here
- * to ensure that all notifier callouts for this mm have been
- * completed and the sptes are really cleaned up before returning
- * to exit_mmap().
+ * synchronize_srcu here prevents mmu_notifier_release from returning to
+ * exit_mmap (which would proceed with freeing all pages in the mm)
+ * until the ->release method returns, if it was invoked by
+ * mmu_notifier_unregister.
+ *
+ * The mmu_notifier_mm can't go away from under us because one mm_count
+ * is held by exit_mmap.
*/
synchronize_srcu(&srcu);
}
@@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
{
BUG_ON(atomic_read(&mm->mm_count) <= 0);
- spin_lock(&mm->mmu_notifier_mm->lock);
if (!hlist_unhashed(&mn->hlist)) {
+ /*
+ * SRCU here will force exit_mmap to wait for ->release to
+ * finish before freeing the pages.
+ */
int id;
+ id = srcu_read_lock(&srcu);
/*
- * Ensure we synchronize up with __mmu_notifier_release().
+ * exit_mmap will block in mmu_notifier_release to guarantee
+ * that ->release is called before freeing the pages.
*/
- id = srcu_read_lock(&srcu);
-
- hlist_del_rcu(&mn->hlist);
- spin_unlock(&mm->mmu_notifier_mm->lock);
-
if (mn->ops->release)
mn->ops->release(mn, mm);
+ srcu_read_unlock(&srcu, id);
+ spin_lock(&mm->mmu_notifier_mm->lock);
/*
- * Allow __mmu_notifier_release() to complete.
+ * Can not use list_del_rcu() since __mmu_notifier_release
+ * can delete it before we hold the lock.
*/
- srcu_read_unlock(&srcu, id);
- } else
+ hlist_del_init_rcu(&mn->hlist);
spin_unlock(&mm->mmu_notifier_mm->lock);
+ }
/*
- * Wait for any running method to finish, including ->release() if it
- * was run by __mmu_notifier_release() instead of us.
+ * Wait for any running method to finish, of course including
+ * ->release if it was run by mmu_notifier_relase instead of us.
*/
synchronize_srcu(&srcu);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 98cbdf6e5532..378a15bcd649 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5158,7 +5158,7 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end,
for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) {
if (poison)
memset((void *)pos, poison, PAGE_SIZE);
- free_reserved_page(virt_to_page(pos));
+ free_reserved_page(virt_to_page((void *)pos));
}
if (pages && s)
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 35aa294656cd..5da2cbcfdbb5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,
return 0;
}
-static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
-{
- struct vm_area_struct *vma;
-
- /* We don't need vma lookup at all. */
- if (!walk->hugetlb_entry)
- return NULL;
-
- VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
- vma = find_vma(walk->mm, addr);
- if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
- return vma;
-
- return NULL;
-}
-
#else /* CONFIG_HUGETLB_PAGE */
-static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
-{
- return NULL;
-}
-
static int walk_hugetlb_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
@@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end,
if (!walk->mm)
return -EINVAL;
+ VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
+
pgd = pgd_offset(walk->mm, addr);
do {
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = NULL;
next = pgd_addr_end(addr, end);
/*
- * handle hugetlb vma individually because pagetable walk for
- * the hugetlb page is dependent on the architecture and
- * we can't handled it in the same manner as non-huge pages.
+ * This function was not intended to be vma based.
+ * But there are vma special cases to be handled:
+ * - hugetlb vma's
+ * - VM_PFNMAP vma's
*/
- vma = hugetlb_vma(addr, walk);
+ vma = find_vma(walk->mm, addr);
if (vma) {
- if (vma->vm_end < next)
+ /*
+ * There are no page structures backing a VM_PFNMAP
+ * range, so do not allow split_huge_page_pmd().
+ */
+ if ((vma->vm_start <= addr) &&
+ (vma->vm_flags & VM_PFNMAP)) {
next = vma->vm_end;
+ pgd = pgd_offset(walk->mm, next);
+ continue;
+ }
/*
- * Hugepage is very tightly coupled with vma, so
- * walk through hugetlb entries within a given vma.
+ * Handle hugetlb vma individually because pagetable
+ * walk for the hugetlb page is dependent on the
+ * architecture and we can't handled it in the same
+ * manner as non-huge pages.
*/
- err = walk_hugetlb_range(vma, addr, next, walk);
- if (err)
- break;
- pgd = pgd_offset(walk->mm, next);
- continue;
+ if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
+ is_vm_hugetlb_page(vma)) {
+ if (vma->vm_end < next)
+ next = vma->vm_end;
+ /*
+ * Hugepage is very tightly coupled with vma,
+ * so walk through hugetlb entries within a
+ * given vma.
+ */
+ err = walk_hugetlb_range(vma, addr, next, walk);
+ if (err)
+ break;
+ pgd = pgd_offset(walk->mm, next);
+ continue;
+ }
}
if (pgd_none_or_clear_bad(pgd)) {