diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 73 |
1 files changed, 56 insertions, 17 deletions
@@ -44,6 +44,23 @@ static void hpage_pincount_sub(struct page *page, int refs) atomic_sub(refs, compound_pincount_ptr(page)); } +/* Equivalent to calling put_page() @refs times. */ +static void put_page_refs(struct page *page, int refs) +{ +#ifdef CONFIG_DEBUG_VM + if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page)) + return; +#endif + + /* + * Calling put_page() for each ref is unnecessarily slow. Only the last + * ref needs a put_page(). + */ + if (refs > 1) + page_ref_sub(page, refs - 1); + put_page(page); +} + /* * Return the compound head page with ref appropriately incremented, * or NULL if that failed. @@ -56,6 +73,21 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) return NULL; if (unlikely(!page_cache_add_speculative(head, refs))) return NULL; + + /* + * At this point we have a stable reference to the head page; but it + * could be that between the compound_head() lookup and the refcount + * increment, the compound page was split, in which case we'd end up + * holding a reference on a page that has nothing to do with the page + * we were given anymore. + * So now that the head page is stable, recheck that the pages still + * belong together. + */ + if (unlikely(compound_head(page) != head)) { + put_page_refs(head, refs); + return NULL; + } + return head; } @@ -96,6 +128,14 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, return NULL; /* + * CAUTION: Don't use compound_head() on the page before this + * point, the result won't be stable. + */ + page = try_get_compound_head(page, refs); + if (!page) + return NULL; + + /* * When pinning a compound page of order > 1 (which is what * hpage_pincount_available() checks for), use an exact count to * track it, via hpage_pincount_add/_sub(). @@ -103,15 +143,10 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, * However, be sure to *also* increment the normal page refcount * field at least once, so that the page really is pinned. */ - if (!hpage_pincount_available(page)) - refs *= GUP_PIN_COUNTING_BIAS; - - page = try_get_compound_head(page, refs); - if (!page) - return NULL; - if (hpage_pincount_available(page)) hpage_pincount_add(page, refs); + else + page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, orig_refs); @@ -135,14 +170,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - VM_BUG_ON_PAGE(page_ref_count(page) < refs, page); - /* - * Calling put_page() for each ref is unnecessarily slow. Only the last - * ref needs a put_page(). - */ - if (refs > 1) - page_ref_sub(page, refs - 1); - put_page(page); + put_page_refs(page, refs); } /** @@ -392,6 +420,17 @@ void unpin_user_pages(struct page **pages, unsigned long npages) } EXPORT_SYMBOL(unpin_user_pages); +/* + * Set the MMF_HAS_PINNED if not set yet; after set it'll be there for the mm's + * lifecycle. Avoid setting the bit unless necessary, or it might cause write + * cache bouncing on large SMP machines for concurrent pinned gups. + */ +static inline void mm_set_has_pinned_flag(unsigned long *mm_flags) +{ + if (!test_bit(MMF_HAS_PINNED, mm_flags)) + set_bit(MMF_HAS_PINNED, mm_flags); +} + #ifdef CONFIG_MMU static struct page *no_page_table(struct vm_area_struct *vma, unsigned int flags) @@ -1293,7 +1332,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, } if (flags & FOLL_PIN) - atomic_set(&mm->has_pinned, 1); + mm_set_has_pinned_flag(&mm->flags); /* * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior @@ -2614,7 +2653,7 @@ static int internal_get_user_pages_fast(unsigned long start, return -EINVAL; if (gup_flags & FOLL_PIN) - atomic_set(¤t->mm->has_pinned, 1); + mm_set_has_pinned_flag(¤t->mm->flags); if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(¤t->mm->mmap_lock); |