summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/internal.h23
-rw-r--r--mm/rmap.c66
2 files changed, 68 insertions, 21 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 14f358d068ac..9e62c8b952b8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -644,14 +644,10 @@ folio_within_vma(struct folio *folio, struct vm_area_struct *vma)
* mlock is usually called at the end of page_add_*_rmap(), munlock at
* the end of page_remove_rmap(); but new anon folios are managed by
* folio_add_lru_vma() calling mlock_new_folio().
- *
- * @compound is used to include pmd mappings of THPs, but filter out
- * pte mappings of THPs, which cannot be consistently counted: a pte
- * mapping of the THP head cannot be distinguished by the page alone.
*/
void mlock_folio(struct folio *folio);
static inline void mlock_vma_folio(struct folio *folio,
- struct vm_area_struct *vma, bool compound)
+ struct vm_area_struct *vma)
{
/*
* The VM_SPECIAL check here serves two purposes.
@@ -661,17 +657,24 @@ static inline void mlock_vma_folio(struct folio *folio,
* file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
* still be set while VM_SPECIAL bits are added: so ignore it then.
*/
- if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) &&
- (compound || !folio_test_large(folio)))
+ if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED))
mlock_folio(folio);
}
void munlock_folio(struct folio *folio);
static inline void munlock_vma_folio(struct folio *folio,
- struct vm_area_struct *vma, bool compound)
+ struct vm_area_struct *vma)
{
- if (unlikely(vma->vm_flags & VM_LOCKED) &&
- (compound || !folio_test_large(folio)))
+ /*
+ * munlock if the function is called. Ideally, we should only
+ * do munlock if any page of folio is unmapped from VMA and
+ * cause folio not fully mapped to VMA.
+ *
+ * But it's not easy to confirm that's the situation. So we
+ * always munlock the folio and page reclaim will correct it
+ * if it's wrong.
+ */
+ if (unlikely(vma->vm_flags & VM_LOCKED))
munlock_folio(folio);
}
diff --git a/mm/rmap.c b/mm/rmap.c
index d24e2c36372e..c6bb2339e35b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -798,6 +798,7 @@ struct folio_referenced_arg {
unsigned long vm_flags;
struct mem_cgroup *memcg;
};
+
/*
* arg: folio_referenced_arg will be passed
*/
@@ -807,17 +808,33 @@ static bool folio_referenced_one(struct folio *folio,
struct folio_referenced_arg *pra = arg;
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
int referenced = 0;
+ unsigned long start = address, ptes = 0;
while (page_vma_mapped_walk(&pvmw)) {
address = pvmw.address;
- if ((vma->vm_flags & VM_LOCKED) &&
- (!folio_test_large(folio) || !pvmw.pte)) {
- /* Restore the mlock which got missed */
- mlock_vma_folio(folio, vma, !pvmw.pte);
- page_vma_mapped_walk_done(&pvmw);
- pra->vm_flags |= VM_LOCKED;
- return false; /* To break the loop */
+ if (vma->vm_flags & VM_LOCKED) {
+ if (!folio_test_large(folio) || !pvmw.pte) {
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma);
+ page_vma_mapped_walk_done(&pvmw);
+ pra->vm_flags |= VM_LOCKED;
+ return false; /* To break the loop */
+ }
+ /*
+ * For large folio fully mapped to VMA, will
+ * be handled after the pvmw loop.
+ *
+ * For large folio cross VMA boundaries, it's
+ * expected to be picked by page reclaim. But
+ * should skip reference of pages which are in
+ * the range of VM_LOCKED vma. As page reclaim
+ * should just count the reference of pages out
+ * the range of VM_LOCKED vma.
+ */
+ ptes++;
+ pra->mapcount--;
+ continue;
}
if (pvmw.pte) {
@@ -842,6 +859,23 @@ static bool folio_referenced_one(struct folio *folio,
pra->mapcount--;
}
+ if ((vma->vm_flags & VM_LOCKED) &&
+ folio_test_large(folio) &&
+ folio_within_vma(folio, vma)) {
+ unsigned long s_align, e_align;
+
+ s_align = ALIGN_DOWN(start, PMD_SIZE);
+ e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);
+
+ /* folio doesn't cross page table boundary and fully mapped */
+ if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma);
+ pra->vm_flags |= VM_LOCKED;
+ return false; /* To break the loop */
+ }
+ }
+
if (referenced)
folio_clear_idle(folio);
if (folio_test_clear_young(folio))
@@ -1254,7 +1288,14 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
(folio_test_large(folio) && folio_entire_mapcount(folio) > 1)) &&
PageAnonExclusive(page), folio);
- mlock_vma_folio(folio, vma, compound);
+ /*
+ * For large folio, only mlock it if it's fully mapped to VMA. It's
+ * not easy to check whether the large folio is fully mapped to VMA
+ * here. Only mlock normal 4K folio and leave page reclaim to handle
+ * large folio.
+ */
+ if (!folio_test_large(folio))
+ mlock_vma_folio(folio, vma);
}
/**
@@ -1354,7 +1395,9 @@ void folio_add_file_rmap_range(struct folio *folio, struct page *page,
if (nr)
__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
- mlock_vma_folio(folio, vma, compound);
+ /* See comments in page_add_anon_rmap() */
+ if (!folio_test_large(folio))
+ mlock_vma_folio(folio, vma);
}
/**
@@ -1465,7 +1508,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
* it's only reliable while mapped.
*/
- munlock_vma_folio(folio, vma, compound);
+ munlock_vma_folio(folio, vma);
}
/*
@@ -1530,7 +1573,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (!(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED)) {
/* Restore the mlock which got missed */
- mlock_vma_folio(folio, vma, false);
+ if (!folio_test_large(folio))
+ mlock_vma_folio(folio, vma);
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;