summaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorMatthew Wilcox (Oracle) <willy@infradead.org>2020-06-27 22:19:08 -0400
committerMatthew Wilcox (Oracle) <willy@infradead.org>2022-01-08 00:28:41 -0500
commit6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3 (patch)
tree19f30971709b643688cada0032b9b33806c633e1 /mm/huge_memory.c
parent25a8de7f8d970ffa7263bd9d32a08138cd949f17 (diff)
downloadlinux-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.tar.gz
linux-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.tar.bz2
linux-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.zip
mm: Use multi-index entries in the page cache
We currently store large folios as 2^N consecutive entries. While this consumes rather more memory than necessary, it also turns out to be buggy. A writeback operation which starts within a tail page of a dirty folio will not write back the folio as the xarray's dirty bit is only set on the head index. With multi-index entries, the dirty bit will be found no matter where in the folio the operation starts. This does end up simplifying the page cache slightly, although not as much as I had hoped. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c18
1 files changed, 14 insertions, 4 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e5483347291c..f58524394dc1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2614,6 +2614,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
{
struct page *head = compound_head(page);
struct deferred_split *ds_queue = get_deferred_split_queue(head);
+ XA_STATE(xas, &head->mapping->i_pages, head->index);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int extra_pins, ret;
@@ -2652,6 +2653,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
goto out;
}
+ xas_split_alloc(&xas, head, compound_order(head),
+ mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+ if (xas_error(&xas)) {
+ ret = xas_error(&xas);
+ goto out;
+ }
+
anon_vma = NULL;
i_mmap_lock_read(mapping);
@@ -2681,13 +2689,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
/* block interrupt reentry in xa_lock and spinlock */
local_irq_disable();
if (mapping) {
- XA_STATE(xas, &mapping->i_pages, page_index(head));
-
/*
* Check if the head page is present in page cache.
* We assume all tail are present too, if head is there.
*/
- xa_lock(&mapping->i_pages);
+ xas_lock(&xas);
+ xas_reset(&xas);
if (xas_load(&xas) != head)
goto fail;
}
@@ -2703,6 +2710,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping) {
int nr = thp_nr_pages(head);
+ xas_split(&xas, head, thp_order(head));
if (PageSwapBacked(head)) {
__mod_lruvec_page_state(head, NR_SHMEM_THPS,
-nr);
@@ -2719,7 +2727,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
spin_unlock(&ds_queue->split_queue_lock);
fail:
if (mapping)
- xa_unlock(&mapping->i_pages);
+ xas_unlock(&xas);
local_irq_enable();
remap_page(head, thp_nr_pages(head));
ret = -EBUSY;
@@ -2733,6 +2741,8 @@ out_unlock:
if (mapping)
i_mmap_unlock_read(mapping);
out:
+ /* Free any memory we didn't use */
+ xas_nomem(&xas, 0);
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
return ret;
}