summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/mm.h3
-rw-r--r--mm/gup.c200
-rw-r--r--mm/hugetlb.c4
4 files changed, 182 insertions, 27 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 54c317c8355f..ea35263eb76b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..20ec56f8e2bb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1536,7 +1536,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
-#ifdef CONFIG_FS_DAX
+
+#if defined(CONFIG_FS_DAX) || defined(CONFIG_CMA)
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
diff --git a/mm/gup.c b/mm/gup.c
index 75029649baca..22291db50013 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
#include <linux/sched/signal.h>
#include <linux/rwsem.h>
#include <linux/hugetlb.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/sched/mm.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
}
EXPORT_SYMBOL(get_user_pages);
+#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
+
#ifdef CONFIG_FS_DAX
+static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+ long i;
+ struct vm_area_struct *vma_prev = NULL;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct vm_area_struct *vma = vmas[i];
+
+ if (vma == vma_prev)
+ continue;
+
+ vma_prev = vma;
+
+ if (vma_is_fsdax(vma))
+ return true;
+ }
+ return false;
+}
+#else
+static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+ return false;
+}
+#endif
+
+#ifdef CONFIG_CMA
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+ /*
+ * We want to make sure we allocate the new page from the same node
+ * as the source page.
+ */
+ int nid = page_to_nid(page);
+ /*
+ * Trying to allocate a page for migration. Ignore allocation
+ * failure warnings. We don't force __GFP_THISNODE here because
+ * this node here is the node where we have CMA reservation and
+ * in some case these nodes will have really less non movable
+ * allocation memory.
+ */
+ gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+
+ if (PageHighMem(page))
+ gfp_mask |= __GFP_HIGHMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(page);
+ /*
+ * We don't want to dequeue from the pool because pool pages will
+ * mostly be from the CMA region.
+ */
+ return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ }
+#endif
+ if (PageTransHuge(page)) {
+ struct page *thp;
+ /*
+ * ignore allocation failure warnings
+ */
+ gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
+
+ /*
+ * Remove the movable mask so that we don't allocate from
+ * CMA area again.
+ */
+ thp_gfpmask &= ~__GFP_MOVABLE;
+ thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+ if (!thp)
+ return NULL;
+ prep_transhuge_page(thp);
+ return thp;
+ }
+
+ return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+ unsigned int gup_flags,
+ struct page **pages,
+ struct vm_area_struct **vmas)
+{
+ long i;
+ bool drain_allow = true;
+ bool migrate_allow = true;
+ LIST_HEAD(cma_page_list);
+
+check_again:
+ for (i = 0; i < nr_pages; i++) {
+ /*
+ * If we get a page from the CMA zone, since we are going to
+ * be pinning these entries, we might as well move them out
+ * of the CMA zone if possible.
+ */
+ if (is_migrate_cma_page(pages[i])) {
+
+ struct page *head = compound_head(pages[i]);
+
+ if (PageHuge(head)) {
+ isolate_huge_page(head, &cma_page_list);
+ } else {
+ if (!PageLRU(head) && drain_allow) {
+ lru_add_drain_all();
+ drain_allow = false;
+ }
+
+ if (!isolate_lru_page(head)) {
+ list_add_tail(&head->lru, &cma_page_list);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON +
+ page_is_file_cache(head),
+ hpage_nr_pages(head));
+ }
+ }
+ }
+ }
+
+ if (!list_empty(&cma_page_list)) {
+ /*
+ * drop the above get_user_pages reference.
+ */
+ for (i = 0; i < nr_pages; i++)
+ put_page(pages[i]);
+
+ if (migrate_pages(&cma_page_list, new_non_cma_page,
+ NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+ /*
+ * some of the pages failed migration. Do get_user_pages
+ * without migration.
+ */
+ migrate_allow = false;
+
+ if (!list_empty(&cma_page_list))
+ putback_movable_pages(&cma_page_list);
+ }
+ /*
+ * We did migrate all the pages, Try to get the page references again
+ * migrating any new CMA pages which we failed to isolate earlier.
+ */
+ nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+ if ((nr_pages > 0) && migrate_allow) {
+ drain_allow = true;
+ goto check_again;
+ }
+ }
+
+ return nr_pages;
+}
+#else
+static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+ unsigned int gup_flags,
+ struct page **pages,
+ struct vm_area_struct **vmas)
+{
+ return nr_pages;
+}
+#endif
+
/*
* This is the same as get_user_pages() in that it assumes we are
* operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
* Contrast this to iov_iter_get_pages() usages which are transient.
*/
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas_arg)
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas_arg)
{
struct vm_area_struct **vmas = vmas_arg;
- struct vm_area_struct *vma_prev = NULL;
+ unsigned long flags;
long rc, i;
if (!pages)
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
return -ENOMEM;
}
+ flags = memalloc_nocma_save();
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+ memalloc_nocma_restore(flags);
+ if (rc < 0)
+ goto out;
- for (i = 0; i < rc; i++) {
- struct vm_area_struct *vma = vmas[i];
-
- if (vma == vma_prev)
- continue;
-
- vma_prev = vma;
-
- if (vma_is_fsdax(vma))
- break;
- }
-
- /*
- * Either get_user_pages() failed, or the vma validation
- * succeeded, in either case we don't need to put_page() before
- * returning.
- */
- if (i >= rc)
+ if (check_dax_vmas(vmas, rc)) {
+ for (i = 0; i < rc; i++)
+ put_page(pages[i]);
+ rc = -EOPNOTSUPP;
goto out;
+ }
- for (i = 0; i < rc; i++)
- put_page(pages[i]);
- rc = -EOPNOTSUPP;
+ rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
out:
if (vmas != vmas_arg)
kfree(vmas);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0c7848fccf93..97b1e0290c66 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1587,8 +1587,8 @@ out_unlock:
return page;
}
-static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask)
{
struct page *page;